aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/naoina/go-stringutil/strings.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/naoina/go-stringutil/strings.go')
-rw-r--r--vendor/github.com/naoina/go-stringutil/strings.go320
1 files changed, 320 insertions, 0 deletions
diff --git a/vendor/github.com/naoina/go-stringutil/strings.go b/vendor/github.com/naoina/go-stringutil/strings.go
new file mode 100644
index 000000000..881ca2c8f
--- /dev/null
+++ b/vendor/github.com/naoina/go-stringutil/strings.go
@@ -0,0 +1,320 @@
+package stringutil
+
+import (
+ "sync"
+ "unicode"
+ "unicode/utf8"
+)
+
+var (
+ mu sync.Mutex
+
+ // Based on https://github.com/golang/lint/blob/32a87160691b3c96046c0c678fe57c5bef761456/lint.go#L702
+ commonInitialismMap = map[string]struct{}{
+ "API": struct{}{},
+ "ASCII": struct{}{},
+ "CPU": struct{}{},
+ "CSRF": struct{}{},
+ "CSS": struct{}{},
+ "DNS": struct{}{},
+ "EOF": struct{}{},
+ "GUID": struct{}{},
+ "HTML": struct{}{},
+ "HTTP": struct{}{},
+ "HTTPS": struct{}{},
+ "ID": struct{}{},
+ "IP": struct{}{},
+ "JSON": struct{}{},
+ "LHS": struct{}{},
+ "QPS": struct{}{},
+ "RAM": struct{}{},
+ "RHS": struct{}{},
+ "RPC": struct{}{},
+ "SLA": struct{}{},
+ "SMTP": struct{}{},
+ "SQL": struct{}{},
+ "SSH": struct{}{},
+ "TCP": struct{}{},
+ "TLS": struct{}{},
+ "TTL": struct{}{},
+ "UDP": struct{}{},
+ "UI": struct{}{},
+ "UID": struct{}{},
+ "UUID": struct{}{},
+ "URI": struct{}{},
+ "URL": struct{}{},
+ "UTF8": struct{}{},
+ "VM": struct{}{},
+ "XML": struct{}{},
+ "XSRF": struct{}{},
+ "XSS": struct{}{},
+ }
+ commonInitialisms = keys(commonInitialismMap)
+ commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
+ longestLen = longestLength(commonInitialisms)
+ shortestLen = shortestLength(commonInitialisms, longestLen)
+)
+
+// ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case.
+// It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'.
+func ToUpperCamelCase(s string) string {
+ if s == "" {
+ return ""
+ }
+ upper := true
+ start := 0
+ result := make([]byte, 0, len(s))
+ var runeBuf [utf8.UTFMax]byte
+ var initialism []byte
+ for _, c := range s {
+ if c == '_' {
+ upper = true
+ candidate := string(result[start:])
+ initialism = initialism[:0]
+ for _, r := range candidate {
+ if r < utf8.RuneSelf {
+ initialism = append(initialism, toUpperASCII(byte(r)))
+ } else {
+ n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
+ initialism = append(initialism, runeBuf[:n]...)
+ }
+ }
+ if length := commonInitialism.LookupByBytes(initialism); length > 0 {
+ result = append(result[:start], initialism...)
+ }
+ start = len(result)
+ continue
+ }
+ if upper {
+ if c < utf8.RuneSelf {
+ result = append(result, toUpperASCII(byte(c)))
+ } else {
+ n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c))
+ result = append(result, runeBuf[:n]...)
+ }
+ upper = false
+ continue
+ }
+ if c < utf8.RuneSelf {
+ result = append(result, byte(c))
+ } else {
+ n := utf8.EncodeRune(runeBuf[:], c)
+ result = append(result, runeBuf[:n]...)
+ }
+ }
+ candidate := string(result[start:])
+ initialism = initialism[:0]
+ for _, r := range candidate {
+ if r < utf8.RuneSelf {
+ initialism = append(initialism, toUpperASCII(byte(r)))
+ } else {
+ n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
+ initialism = append(initialism, runeBuf[:n]...)
+ }
+ }
+ if length := commonInitialism.LookupByBytes(initialism); length > 0 {
+ result = append(result[:start], initialism...)
+ }
+ return string(result)
+}
+
+// ToUpperCamelCaseASCII is similar to ToUpperCamelCase, but optimized for
+// only the ASCII characters.
+// ToUpperCamelCaseASCII is faster than ToUpperCamelCase, but doesn't work if
+// contains non-ASCII characters.
+func ToUpperCamelCaseASCII(s string) string {
+ if s == "" {
+ return ""
+ }
+ upper := true
+ start := 0
+ result := make([]byte, 0, len(s))
+ var initialism []byte
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c == '_' {
+ upper = true
+ candidate := result[start:]
+ initialism = initialism[:0]
+ for _, b := range candidate {
+ initialism = append(initialism, toUpperASCII(b))
+ }
+ if length := commonInitialism.LookupByBytes(initialism); length > 0 {
+ result = append(result[:start], initialism...)
+ }
+ start = len(result)
+ continue
+ }
+ if upper {
+ result = append(result, toUpperASCII(c))
+ upper = false
+ continue
+ }
+ result = append(result, c)
+ }
+ candidate := result[start:]
+ initialism = initialism[:0]
+ for _, b := range candidate {
+ initialism = append(initialism, toUpperASCII(b))
+ }
+ if length := commonInitialism.LookupByBytes(initialism); length > 0 {
+ result = append(result[:start], initialism...)
+ }
+ return string(result)
+}
+
+// ToSnakeCase returns a copy of the string s with all Unicode letters mapped to their snake case.
+// It will insert letter of '_' at position of previous letter of uppercase and all
+// letters convert to lower case.
+// ToSnakeCase does not insert '_' letter into a common initialism word like ID, URL and so on.
+func ToSnakeCase(s string) string {
+ if s == "" {
+ return ""
+ }
+ result := make([]byte, 0, len(s))
+ var runeBuf [utf8.UTFMax]byte
+ var j, skipCount int
+ for i, c := range s {
+ if i < skipCount {
+ continue
+ }
+ if unicode.IsUpper(c) {
+ if i != 0 {
+ result = append(result, '_')
+ }
+ next := nextIndex(j, len(s))
+ if length := commonInitialism.Lookup(s[j:next]); length > 0 {
+ for _, r := range s[j : j+length] {
+ if r < utf8.RuneSelf {
+ result = append(result, toLowerASCII(byte(r)))
+ } else {
+ n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(r))
+ result = append(result, runeBuf[:n]...)
+ }
+ }
+ j += length - 1
+ skipCount = i + length
+ continue
+ }
+ }
+ if c < utf8.RuneSelf {
+ result = append(result, toLowerASCII(byte(c)))
+ } else {
+ n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(c))
+ result = append(result, runeBuf[:n]...)
+ }
+ j++
+ }
+ return string(result)
+}
+
+// ToSnakeCaseASCII is similar to ToSnakeCase, but optimized for only the ASCII
+// characters.
+// ToSnakeCaseASCII is faster than ToSnakeCase, but doesn't work correctly if
+// contains non-ASCII characters.
+func ToSnakeCaseASCII(s string) string {
+ if s == "" {
+ return ""
+ }
+ result := make([]byte, 0, len(s))
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if isUpperASCII(c) {
+ if i != 0 {
+ result = append(result, '_')
+ }
+ if k := i + shortestLen - 1; k < len(s) && isUpperASCII(s[k]) {
+ if length := commonInitialism.Lookup(s[i:nextIndex(i, len(s))]); length > 0 {
+ for j, buf := 0, s[i:i+length]; j < len(buf); j++ {
+ result = append(result, toLowerASCII(buf[j]))
+ }
+ i += length - 1
+ continue
+ }
+ }
+ }
+ result = append(result, toLowerASCII(c))
+ }
+ return string(result)
+}
+
+// AddCommonInitialism adds ss to list of common initialisms.
+func AddCommonInitialism(ss ...string) {
+ mu.Lock()
+ defer mu.Unlock()
+ for _, s := range ss {
+ commonInitialismMap[s] = struct{}{}
+ }
+ commonInitialisms = keys(commonInitialismMap)
+ commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
+ longestLen = longestLength(commonInitialisms)
+ shortestLen = shortestLength(commonInitialisms, longestLen)
+}
+
+// DelCommonInitialism deletes ss from list of common initialisms.
+func DelCommonInitialism(ss ...string) {
+ mu.Lock()
+ defer mu.Unlock()
+ for _, s := range ss {
+ delete(commonInitialismMap, s)
+ }
+ commonInitialisms = keys(commonInitialismMap)
+ commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
+ longestLen = longestLength(commonInitialisms)
+ shortestLen = shortestLength(commonInitialisms, longestLen)
+}
+
+func isUpperASCII(c byte) bool {
+ return 'A' <= c && c <= 'Z'
+}
+
+func isLowerASCII(c byte) bool {
+ return 'a' <= c && c <= 'z'
+}
+
+func toUpperASCII(c byte) byte {
+ if isLowerASCII(c) {
+ return c - ('a' - 'A')
+ }
+ return c
+}
+
+func toLowerASCII(c byte) byte {
+ if isUpperASCII(c) {
+ return c + 'a' - 'A'
+ }
+ return c
+}
+
+func nextIndex(i, maxlen int) int {
+ if n := i + longestLen; n < maxlen {
+ return n
+ }
+ return maxlen
+}
+
+func keys(m map[string]struct{}) []string {
+ result := make([]string, 0, len(m))
+ for k := range m {
+ result = append(result, k)
+ }
+ return result
+}
+
+func shortestLength(strs []string, shortest int) int {
+ for _, s := range strs {
+ if candidate := utf8.RuneCountInString(s); candidate < shortest {
+ shortest = candidate
+ }
+ }
+ return shortest
+}
+
+func longestLength(strs []string) (longest int) {
+ for _, s := range strs {
+ if candidate := utf8.RuneCountInString(s); candidate > longest {
+ longest = candidate
+ }
+ }
+ return longest
+}