aboutsummaryrefslogtreecommitdiffstats
path: root/common/bitutil/compress.go
diff options
context:
space:
mode:
authorPéter Szilágyi <peterke@gmail.com>2017-05-06 23:35:59 +0800
committerPéter Szilágyi <peterke@gmail.com>2017-05-07 00:06:17 +0800
commitcf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb (patch)
treef6ca01371745603c7921713853fab601de44ca4d /common/bitutil/compress.go
parentfd5d51c9ae3256a1f24cf974dcd02433a259677e (diff)
downloadgo-tangerine-cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb.tar
go-tangerine-cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb.tar.gz
go-tangerine-cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb.tar.bz2
go-tangerine-cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb.tar.lz
go-tangerine-cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb.tar.xz
go-tangerine-cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb.tar.zst
go-tangerine-cf19586cfbe5aa379c8fdb046dc5a8c0fa1cebbb.zip
common/bitutil: fix decompression corner cases; fuzz, test & bench
Diffstat (limited to 'common/bitutil/compress.go')
-rw-r--r--common/bitutil/compress.go137
1 files changed, 94 insertions, 43 deletions
diff --git a/common/bitutil/compress.go b/common/bitutil/compress.go
index c6c139ab9..a806c0e8b 100644
--- a/common/bitutil/compress.go
+++ b/common/bitutil/compress.go
@@ -16,78 +16,129 @@
package bitutil
-/*
-The compression algorithm implemented by CompressBytes and DecompressBytes is
-optimized for "sparse" input data which contains a lot of zero bytes. Decompression
-requires knowledge of the decompressed data length. Compression works as follows:
+import "errors"
-if data only contains zeroes,
- CompressBytes(data) == nil
-otherwise if len(data) <= 1,
- CompressBytes(data) == data
-otherwise:
- CompressBytes(data) == append(CompressBytes(nonZeroBits(data)), nonZeroBytes(data)...)
-where
- nonZeroBits(data) is a bit vector with len(data) bits (MSB first):
- nonZeroBits(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0
- len(nonZeroBits(data)) == (len(data)+7)/8
- nonZeroBytes(data) contains the non-zero bytes of data in the same order
-*/
+var (
+ // ErrMissingData is returned from decompression if the byte referenced by
+ // the bitset header overflows the input data.
+ ErrMissingData = errors.New("missing bytes on input")
-// CompressBytes compresses the input byte slice
+ // ErrUnreferencedData is returned from decompression if not all bytes were used
+ // up from the input data after decompressing it.
+ ErrUnreferencedData = errors.New("extra bytes on input")
+
+ // ErrExceededTarget is returned from decompression if the bitset header has
+ // more bits defined than the number of target buffer space available.
+ ErrExceededTarget = errors.New("target data size exceeded")
+
+ // ErrZeroContent is returned from decompression if a data byte referenced in
+ // the bitset header is actually a zero byte.
+ ErrZeroContent = errors.New("zero byte in input content")
+)
+
+// The compression algorithm implemented by CompressBytes and DecompressBytes is
+// optimized for sparse input data which contains a lot of zero bytes. Decompression
+// requires knowledge of the decompressed data length.
+//
+// Compression works as follows:
+//
+// if data only contains zeroes,
+// CompressBytes(data) == nil
+// otherwise if len(data) <= 1,
+// CompressBytes(data) == data
+// otherwise:
+// CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...)
+// where
+// nonZeroBitset(data) is a bit vector with len(data) bits (MSB first):
+// nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0
+// len(nonZeroBitset(data)) == (len(data)+7)/8
+// nonZeroBytes(data) contains the non-zero bytes of data in the same order
+
+// CompressBytes compresses the input byte slice according to the sparse bitset
+// representation algorithm.
func CompressBytes(data []byte) []byte {
+ // Empty slices get compressed to nil
if len(data) == 0 {
return nil
}
+ // One byte slices compress to nil or retain the single byte
if len(data) == 1 {
if data[0] == 0 {
return nil
- } else {
- return data
}
+ return data
}
-
- bitsLen := (len(data) + 7) / 8
- nonZeroBits := make([]byte, bitsLen)
+ // Calculate the bitset of set bytes, and gather the non-zero bytes
+ nonZeroBitset := make([]byte, (len(data)+7)/8)
nonZeroBytes := make([]byte, 0, len(data))
+
for i, b := range data {
if b != 0 {
nonZeroBytes = append(nonZeroBytes, b)
- nonZeroBits[i/8] |= 1 << byte(7-i%8)
+ nonZeroBitset[i/8] |= 1 << byte(7-i%8)
}
}
if len(nonZeroBytes) == 0 {
return nil
}
- return append(CompressBytes(nonZeroBits), nonZeroBytes...)
+ return append(CompressBytes(nonZeroBitset), nonZeroBytes...)
}
-// DecompressBytes decompresses data with a known target size.
-// In addition to the decompressed output, the function returns the length of
-// compressed input data corresponding to the output. The input slice may be longer.
-// If the input slice is too short, (nil, -1) is returned.
-func DecompressBytes(data []byte, targetLen int) ([]byte, int) {
- decomp := make([]byte, targetLen)
- if len(data) == 0 {
- return decomp, 0
+// DecompressBytes decompresses data with a known target size. In addition to the
+// decompressed output, the function returns the length of compressed input data
+// corresponding to the output as the input slice may be longer.
+func DecompressBytes(data []byte, target int) ([]byte, error) {
+ out, size, err := decompressBytes(data, target)
+ if err != nil {
+ return nil, err
}
- if targetLen == 1 {
- return data[0:1], 1
+ if size != len(data) {
+ return nil, ErrUnreferencedData
}
+ return out, nil
+}
- bitsLen := (targetLen + 7) / 8
- nonZeroBits, ptr := DecompressBytes(data, bitsLen)
- if ptr < 0 {
- return nil, -1
+// decompressBytes decompresses data with a known target size. In addition to the
+// decompressed output, the function returns the length of compressed input data
+// corresponding to the output as the input slice may be longer.
+func decompressBytes(data []byte, target int) ([]byte, int, error) {
+ // Sanity check 0 targets to avoid infinite recursion
+ if target == 0 {
+ return nil, 0, nil
+ }
+ // Handle the zero and single byte corner cases
+ decomp := make([]byte, target)
+ if len(data) == 0 {
+ return decomp, 0, nil
+ }
+ if target == 1 {
+ decomp[0] = data[0] // copy to avoid referencing the input slice
+ if data[0] != 0 {
+ return decomp, 1, nil
+ }
+ return decomp, 0, nil
+ }
+ // Decompress the bitset of set bytes and distribute the non zero bytes
+ nonZeroBitset, ptr, err := decompressBytes(data, (target+7)/8)
+ if err != nil {
+ return nil, ptr, err
}
- for i, _ := range decomp {
- if nonZeroBits[i/8]&(1<<byte(7-i%8)) != 0 {
- if ptr == len(data) {
- return nil, -1
+ for i := 0; i < 8*len(nonZeroBitset); i++ {
+ if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 {
+ // Make sure we have enough data to push into the correct slot
+ if ptr >= len(data) {
+ return nil, 0, ErrMissingData
+ }
+ if i >= len(decomp) {
+ return nil, 0, ErrExceededTarget
+ }
+ // Make sure the data is valid and push into the slot
+ if data[ptr] == 0 {
+ return nil, 0, ErrZeroContent
}
decomp[i] = data[ptr]
ptr++
}
}
- return decomp, ptr
+ return decomp, ptr, nil
}