diff options
Diffstat (limited to 'Godeps/_workspace/src/golang.org/x/text/transform/transform.go')
-rw-r--r-- | Godeps/_workspace/src/golang.org/x/text/transform/transform.go | 173 |
1 files changed, 102 insertions, 71 deletions
diff --git a/Godeps/_workspace/src/golang.org/x/text/transform/transform.go b/Godeps/_workspace/src/golang.org/x/text/transform/transform.go index f79b461b8..51862b02b 100644 --- a/Godeps/_workspace/src/golang.org/x/text/transform/transform.go +++ b/Godeps/_workspace/src/golang.org/x/text/transform/transform.go @@ -207,7 +207,9 @@ func (w *Writer) Write(data []byte) (n int, err error) { return n, werr } src = src[nSrc:] - if w.n > 0 && len(src) <= n { + if w.n == 0 { + n += nSrc + } else if len(src) <= n { // Enough bytes from w.src have been consumed. We make src point // to data instead to reduce the copying. w.n = 0 @@ -216,35 +218,46 @@ func (w *Writer) Write(data []byte) (n int, err error) { if n < len(data) && (err == nil || err == ErrShortSrc) { continue } - } else { - n += nSrc } - switch { - case err == ErrShortDst && (nDst > 0 || nSrc > 0): - case err == ErrShortSrc && len(src) < len(w.src): - m := copy(w.src, src) - // If w.n > 0, bytes from data were already copied to w.src and n - // was already set to the number of bytes consumed. - if w.n == 0 { - n += m + switch err { + case ErrShortDst: + // This error is okay as long as we are making progress. + if nDst > 0 || nSrc > 0 { + continue + } + case ErrShortSrc: + if len(src) < len(w.src) { + m := copy(w.src, src) + // If w.n > 0, bytes from data were already copied to w.src and n + // was already set to the number of bytes consumed. + if w.n == 0 { + n += m + } + w.n = m + err = nil + } else if nDst > 0 || nSrc > 0 { + // Not enough buffer to store the remainder. Keep processing as + // long as there is progress. Without this case, transforms that + // require a lookahead larger than the buffer may result in an + // error. This is not something one may expect to be common in + // practice, but it may occur when buffers are set to small + // sizes during testing. + continue + } + case nil: + if w.n > 0 { + err = errInconsistentByteCount } - w.n = m - return n, nil - case err == nil && w.n > 0: - return n, errInconsistentByteCount - default: - return n, err } + return n, err } } // Close implements the io.Closer interface. func (w *Writer) Close() error { - for src := w.src[:w.n]; len(src) > 0; { + src := w.src[:w.n] + for { nDst, nSrc, err := w.t.Transform(w.dst, src, true) - if nDst == 0 { - return err - } if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { return werr } @@ -253,7 +266,6 @@ func (w *Writer) Close() error { } src = src[nSrc:] } - return nil } type nop struct{ NopResetter } @@ -493,7 +505,9 @@ func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err err // of b to the start of the new slice. func grow(b []byte, n int) []byte { m := len(b) - if m <= 256 { + if m <= 32 { + m = 64 + } else if m <= 256 { m *= 2 } else { m += m >> 1 @@ -508,86 +522,103 @@ const initialBufSize = 128 // String returns a string with the result of converting s[:n] using t, where // n <= len(s). If err == nil, n will be len(s). It calls Reset on t. func String(t Transformer, s string) (result string, n int, err error) { + t.Reset() if s == "" { - return "", 0, nil + // Fast path for the common case for empty input. Results in about a + // 86% reduction of running time for BenchmarkStringLowerEmpty. + if _, _, err := t.Transform(nil, nil, true); err == nil { + return "", 0, nil + } } - t.Reset() - // Allocate only once. Note that both dst and src escape when passed to // Transform. buf := [2 * initialBufSize]byte{} dst := buf[:initialBufSize:initialBufSize] src := buf[initialBufSize : 2*initialBufSize] - // Avoid allocation if the transformed string is identical to the original. - // After this loop, pDst will point to the furthest point in s for which it - // could be detected that t gives equal results, src[:nSrc] will - // indicated the last processed chunk of s for which the output is not equal - // and dst[:nDst] will be the transform of this chunk. - var nDst, nSrc int - pDst := 0 // Used as index in both src and dst in this loop. + // The input string s is transformed in multiple chunks (starting with a + // chunk size of initialBufSize). nDst and nSrc are per-chunk (or + // per-Transform-call) indexes, pDst and pSrc are overall indexes. + nDst, nSrc := 0, 0 + pDst, pSrc := 0, 0 + + // pPrefix is the length of a common prefix: the first pPrefix bytes of the + // result will equal the first pPrefix bytes of s. It is not guaranteed to + // be the largest such value, but if pPrefix, len(result) and len(s) are + // all equal after the final transform (i.e. calling Transform with atEOF + // being true returned nil error) then we don't need to allocate a new + // result string. + pPrefix := 0 for { - n := copy(src, s[pDst:]) - nDst, nSrc, err = t.Transform(dst, src[:n], pDst+n == len(s)) - - // Note 1: we will not enter the loop with pDst == len(s) and we will - // not end the loop with it either. So if nSrc is 0, this means there is - // some kind of error from which we cannot recover given the current - // buffer sizes. We will give up in this case. - // Note 2: it is not entirely correct to simply do a bytes.Equal as - // a Transformer may buffer internally. It will work in most cases, - // though, and no harm is done if it doesn't work. + // Invariant: pDst == pPrefix && pSrc == pPrefix. + + n := copy(src, s[pSrc:]) + nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s)) + pDst += nDst + pSrc += nSrc + // TODO: let transformers implement an optional Spanner interface, akin // to norm's QuickSpan. This would even allow us to avoid any allocation. - if nSrc == 0 || !bytes.Equal(dst[:nDst], src[:nSrc]) { + if !bytes.Equal(dst[:nDst], src[:nSrc]) { break } - - if pDst += nDst; pDst == len(s) { - return s, pDst, nil + pPrefix = pSrc + if err == ErrShortDst { + // A buffer can only be short if a transformer modifies its input. + break + } else if err == ErrShortSrc { + if nSrc == 0 { + // No progress was made. + break + } + // Equal so far and !atEOF, so continue checking. + } else if err != nil || pPrefix == len(s) { + return string(s[:pPrefix]), pPrefix, err } } - - // Move the bytes seen so far to dst. - pSrc := pDst + nSrc - if pDst+nDst <= initialBufSize { - copy(dst[pDst:], dst[:nDst]) - } else { - b := make([]byte, len(s)+nDst-nSrc) - copy(b[pDst:], dst[:nDst]) - dst = b + // Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc. + + // We have transformed the first pSrc bytes of the input s to become pDst + // transformed bytes. Those transformed bytes are discontiguous: the first + // pPrefix of them equal s[:pPrefix] and the last nDst of them equal + // dst[:nDst]. We copy them around, into a new dst buffer if necessary, so + // that they become one contiguous slice: dst[:pDst]. + if pPrefix != 0 { + newDst := dst + if pDst > len(newDst) { + newDst = make([]byte, len(s)+nDst-nSrc) + } + copy(newDst[pPrefix:pDst], dst[:nDst]) + copy(newDst[:pPrefix], s[:pPrefix]) + dst = newDst } - copy(dst, s[:pDst]) - pDst += nDst - if err != nil && err != ErrShortDst && err != ErrShortSrc { + // Prevent duplicate Transform calls with atEOF being true at the end of + // the input. Also return if we have an unrecoverable error. + if (err == nil && pSrc == len(s)) || + (err != nil && err != ErrShortDst && err != ErrShortSrc) { return string(dst[:pDst]), pSrc, err } - // Complete the string with the remainder. + // Transform the remaining input, growing dst and src buffers as necessary. for { n := copy(src, s[pSrc:]) - nDst, nSrc, err = t.Transform(dst[pDst:], src[:n], pSrc+n == len(s)) + nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s)) pDst += nDst pSrc += nSrc - switch err { - case nil: - if pSrc == len(s) { - return string(dst[:pDst]), pSrc, nil - } - case ErrShortDst: - // Do not grow as long as we can make progress. This may avoid - // excessive allocations. + // If we got ErrShortDst or ErrShortSrc, do not grow as long as we can + // make progress. This may avoid excessive allocations. + if err == ErrShortDst { if nDst == 0 { dst = grow(dst, pDst) } - case ErrShortSrc: + } else if err == ErrShortSrc { if nSrc == 0 { src = grow(src, 0) } - default: + } else if err != nil || pSrc == len(s) { return string(dst[:pDst]), pSrc, err } } |