aboutsummaryrefslogtreecommitdiffstats
path: root/common/bitutil
diff options
context:
space:
mode:
authorZsolt Felfoldi <zsfelfoldi@gmail.com>2017-05-06 02:11:55 +0800
committerZsolt Felfoldi <zsfelfoldi@gmail.com>2017-05-06 02:24:48 +0800
commitfd5d51c9ae3256a1f24cf974dcd02433a259677e (patch)
tree6fb1136c056980d023ca2197b68cc81be24948f4 /common/bitutil
parent2ec5cf1673e19da85471892875934fe0564a209d (diff)
downloadgo-tangerine-fd5d51c9ae3256a1f24cf974dcd02433a259677e.tar
go-tangerine-fd5d51c9ae3256a1f24cf974dcd02433a259677e.tar.gz
go-tangerine-fd5d51c9ae3256a1f24cf974dcd02433a259677e.tar.bz2
go-tangerine-fd5d51c9ae3256a1f24cf974dcd02433a259677e.tar.lz
go-tangerine-fd5d51c9ae3256a1f24cf974dcd02433a259677e.tar.xz
go-tangerine-fd5d51c9ae3256a1f24cf974dcd02433a259677e.tar.zst
go-tangerine-fd5d51c9ae3256a1f24cf974dcd02433a259677e.zip
common/bitutil: added data compression algorithm
Diffstat (limited to 'common/bitutil')
-rw-r--r--common/bitutil/compress.go93
1 files changed, 93 insertions, 0 deletions
diff --git a/common/bitutil/compress.go b/common/bitutil/compress.go
new file mode 100644
index 000000000..c6c139ab9
--- /dev/null
+++ b/common/bitutil/compress.go
@@ -0,0 +1,93 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bitutil
+
+/*
+The compression algorithm implemented by CompressBytes and DecompressBytes is
+optimized for "sparse" input data which contains a lot of zero bytes. Decompression
+requires knowledge of the decompressed data length. Compression works as follows:
+
+if data only contains zeroes,
+ CompressBytes(data) == nil
+otherwise if len(data) <= 1,
+ CompressBytes(data) == data
+otherwise:
+ CompressBytes(data) == append(CompressBytes(nonZeroBits(data)), nonZeroBytes(data)...)
+where
+ nonZeroBits(data) is a bit vector with len(data) bits (MSB first):
+ nonZeroBits(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0
+ len(nonZeroBits(data)) == (len(data)+7)/8
+ nonZeroBytes(data) contains the non-zero bytes of data in the same order
+*/
+
+// CompressBytes compresses the input byte slice
+func CompressBytes(data []byte) []byte {
+ if len(data) == 0 {
+ return nil
+ }
+ if len(data) == 1 {
+ if data[0] == 0 {
+ return nil
+ } else {
+ return data
+ }
+ }
+
+ bitsLen := (len(data) + 7) / 8
+ nonZeroBits := make([]byte, bitsLen)
+ nonZeroBytes := make([]byte, 0, len(data))
+ for i, b := range data {
+ if b != 0 {
+ nonZeroBytes = append(nonZeroBytes, b)
+ nonZeroBits[i/8] |= 1 << byte(7-i%8)
+ }
+ }
+ if len(nonZeroBytes) == 0 {
+ return nil
+ }
+ return append(CompressBytes(nonZeroBits), nonZeroBytes...)
+}
+
+// DecompressBytes decompresses data with a known target size.
+// In addition to the decompressed output, the function returns the length of
+// compressed input data corresponding to the output. The input slice may be longer.
+// If the input slice is too short, (nil, -1) is returned.
+func DecompressBytes(data []byte, targetLen int) ([]byte, int) {
+ decomp := make([]byte, targetLen)
+ if len(data) == 0 {
+ return decomp, 0
+ }
+ if targetLen == 1 {
+ return data[0:1], 1
+ }
+
+ bitsLen := (targetLen + 7) / 8
+ nonZeroBits, ptr := DecompressBytes(data, bitsLen)
+ if ptr < 0 {
+ return nil, -1
+ }
+ for i, _ := range decomp {
+ if nonZeroBits[i/8]&(1<<byte(7-i%8)) != 0 {
+ if ptr == len(data) {
+ return nil, -1
+ }
+ decomp[i] = data[ptr]
+ ptr++
+ }
+ }
+ return decomp, ptr
+}