aboutsummaryrefslogblamecommitdiffstats
path: root/vendor/github.com/naoina/go-stringutil/strings.go
blob: 881ca2c8f67068eaca55e1621dbfdb133c299173 (plain) (tree)































































































































































































































































































































                                                                                                             
package stringutil

import (
    "sync"
    "unicode"
    "unicode/utf8"
)

var (
    mu sync.Mutex

    // Based on https://github.com/golang/lint/blob/32a87160691b3c96046c0c678fe57c5bef761456/lint.go#L702
    commonInitialismMap = map[string]struct{}{
        "API":   struct{}{},
        "ASCII": struct{}{},
        "CPU":   struct{}{},
        "CSRF":  struct{}{},
        "CSS":   struct{}{},
        "DNS":   struct{}{},
        "EOF":   struct{}{},
        "GUID":  struct{}{},
        "HTML":  struct{}{},
        "HTTP":  struct{}{},
        "HTTPS": struct{}{},
        "ID":    struct{}{},
        "IP":    struct{}{},
        "JSON":  struct{}{},
        "LHS":   struct{}{},
        "QPS":   struct{}{},
        "RAM":   struct{}{},
        "RHS":   struct{}{},
        "RPC":   struct{}{},
        "SLA":   struct{}{},
        "SMTP":  struct{}{},
        "SQL":   struct{}{},
        "SSH":   struct{}{},
        "TCP":   struct{}{},
        "TLS":   struct{}{},
        "TTL":   struct{}{},
        "UDP":   struct{}{},
        "UI":    struct{}{},
        "UID":   struct{}{},
        "UUID":  struct{}{},
        "URI":   struct{}{},
        "URL":   struct{}{},
        "UTF8":  struct{}{},
        "VM":    struct{}{},
        "XML":   struct{}{},
        "XSRF":  struct{}{},
        "XSS":   struct{}{},
    }
    commonInitialisms = keys(commonInitialismMap)
    commonInitialism  = mustDoubleArray(newDoubleArray(commonInitialisms))
    longestLen        = longestLength(commonInitialisms)
    shortestLen       = shortestLength(commonInitialisms, longestLen)
)

// ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case.
// It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'.
func ToUpperCamelCase(s string) string {
    if s == "" {
        return ""
    }
    upper := true
    start := 0
    result := make([]byte, 0, len(s))
    var runeBuf [utf8.UTFMax]byte
    var initialism []byte
    for _, c := range s {
        if c == '_' {
            upper = true
            candidate := string(result[start:])
            initialism = initialism[:0]
            for _, r := range candidate {
                if r < utf8.RuneSelf {
                    initialism = append(initialism, toUpperASCII(byte(r)))
                } else {
                    n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
                    initialism = append(initialism, runeBuf[:n]...)
                }
            }
            if length := commonInitialism.LookupByBytes(initialism); length > 0 {
                result = append(result[:start], initialism...)
            }
            start = len(result)
            continue
        }
        if upper {
            if c < utf8.RuneSelf {
                result = append(result, toUpperASCII(byte(c)))
            } else {
                n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c))
                result = append(result, runeBuf[:n]...)
            }
            upper = false
            continue
        }
        if c < utf8.RuneSelf {
            result = append(result, byte(c))
        } else {
            n := utf8.EncodeRune(runeBuf[:], c)
            result = append(result, runeBuf[:n]...)
        }
    }
    candidate := string(result[start:])
    initialism = initialism[:0]
    for _, r := range candidate {
        if r < utf8.RuneSelf {
            initialism = append(initialism, toUpperASCII(byte(r)))
        } else {
            n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
            initialism = append(initialism, runeBuf[:n]...)
        }
    }
    if length := commonInitialism.LookupByBytes(initialism); length > 0 {
        result = append(result[:start], initialism...)
    }
    return string(result)
}

// ToUpperCamelCaseASCII is similar to ToUpperCamelCase, but optimized for
// only the ASCII characters.
// ToUpperCamelCaseASCII is faster than ToUpperCamelCase, but doesn't work if
// contains non-ASCII characters.
func ToUpperCamelCaseASCII(s string) string {
    if s == "" {
        return ""
    }
    upper := true
    start := 0
    result := make([]byte, 0, len(s))
    var initialism []byte
    for i := 0; i < len(s); i++ {
        c := s[i]
        if c == '_' {
            upper = true
            candidate := result[start:]
            initialism = initialism[:0]
            for _, b := range candidate {
                initialism = append(initialism, toUpperASCII(b))
            }
            if length := commonInitialism.LookupByBytes(initialism); length > 0 {
                result = append(result[:start], initialism...)
            }
            start = len(result)
            continue
        }
        if upper {
            result = append(result, toUpperASCII(c))
            upper = false
            continue
        }
        result = append(result, c)
    }
    candidate := result[start:]
    initialism = initialism[:0]
    for _, b := range candidate {
        initialism = append(initialism, toUpperASCII(b))
    }
    if length := commonInitialism.LookupByBytes(initialism); length > 0 {
        result = append(result[:start], initialism...)
    }
    return string(result)
}

// ToSnakeCase returns a copy of the string s with all Unicode letters mapped to their snake case.
// It will insert letter of '_' at position of previous letter of uppercase and all
// letters convert to lower case.
// ToSnakeCase does not insert '_' letter into a common initialism word like ID, URL and so on.
func ToSnakeCase(s string) string {
    if s == "" {
        return ""
    }
    result := make([]byte, 0, len(s))
    var runeBuf [utf8.UTFMax]byte
    var j, skipCount int
    for i, c := range s {
        if i < skipCount {
            continue
        }
        if unicode.IsUpper(c) {
            if i != 0 {
                result = append(result, '_')
            }
            next := nextIndex(j, len(s))
            if length := commonInitialism.Lookup(s[j:next]); length > 0 {
                for _, r := range s[j : j+length] {
                    if r < utf8.RuneSelf {
                        result = append(result, toLowerASCII(byte(r)))
                    } else {
                        n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(r))
                        result = append(result, runeBuf[:n]...)
                    }
                }
                j += length - 1
                skipCount = i + length
                continue
            }
        }
        if c < utf8.RuneSelf {
            result = append(result, toLowerASCII(byte(c)))
        } else {
            n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(c))
            result = append(result, runeBuf[:n]...)
        }
        j++
    }
    return string(result)
}

// ToSnakeCaseASCII is similar to ToSnakeCase, but optimized for only the ASCII
// characters.
// ToSnakeCaseASCII is faster than ToSnakeCase, but doesn't work correctly if
// contains non-ASCII characters.
func ToSnakeCaseASCII(s string) string {
    if s == "" {
        return ""
    }
    result := make([]byte, 0, len(s))
    for i := 0; i < len(s); i++ {
        c := s[i]
        if isUpperASCII(c) {
            if i != 0 {
                result = append(result, '_')
            }
            if k := i + shortestLen - 1; k < len(s) && isUpperASCII(s[k]) {
                if length := commonInitialism.Lookup(s[i:nextIndex(i, len(s))]); length > 0 {
                    for j, buf := 0, s[i:i+length]; j < len(buf); j++ {
                        result = append(result, toLowerASCII(buf[j]))
                    }
                    i += length - 1
                    continue
                }
            }
        }
        result = append(result, toLowerASCII(c))
    }
    return string(result)
}

// AddCommonInitialism adds ss to list of common initialisms.
func AddCommonInitialism(ss ...string) {
    mu.Lock()
    defer mu.Unlock()
    for _, s := range ss {
        commonInitialismMap[s] = struct{}{}
    }
    commonInitialisms = keys(commonInitialismMap)
    commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
    longestLen = longestLength(commonInitialisms)
    shortestLen = shortestLength(commonInitialisms, longestLen)
}

// DelCommonInitialism deletes ss from list of common initialisms.
func DelCommonInitialism(ss ...string) {
    mu.Lock()
    defer mu.Unlock()
    for _, s := range ss {
        delete(commonInitialismMap, s)
    }
    commonInitialisms = keys(commonInitialismMap)
    commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
    longestLen = longestLength(commonInitialisms)
    shortestLen = shortestLength(commonInitialisms, longestLen)
}

func isUpperASCII(c byte) bool {
    return 'A' <= c && c <= 'Z'
}

func isLowerASCII(c byte) bool {
    return 'a' <= c && c <= 'z'
}

func toUpperASCII(c byte) byte {
    if isLowerASCII(c) {
        return c - ('a' - 'A')
    }
    return c
}

func toLowerASCII(c byte) byte {
    if isUpperASCII(c) {
        return c + 'a' - 'A'
    }
    return c
}

func nextIndex(i, maxlen int) int {
    if n := i + longestLen; n < maxlen {
        return n
    }
    return maxlen
}

func keys(m map[string]struct{}) []string {
    result := make([]string, 0, len(m))
    for k := range m {
        result = append(result, k)
    }
    return result
}

func shortestLength(strs []string, shortest int) int {
    for _, s := range strs {
        if candidate := utf8.RuneCountInString(s); candidate < shortest {
            shortest = candidate
        }
    }
    return shortest
}

func longestLength(strs []string) (longest int) {
    for _, s := range strs {
        if candidate := utf8.RuneCountInString(s); candidate > longest {
            longest = candidate
        }
    }
    return longest
}