diff options
Diffstat (limited to 'swarm/bmt')
-rw-r--r-- | swarm/bmt/bmt.go | 543 | ||||
-rw-r--r-- | swarm/bmt/bmt_r.go | 85 | ||||
-rw-r--r-- | swarm/bmt/bmt_test.go | 390 |
3 files changed, 1018 insertions, 0 deletions
diff --git a/swarm/bmt/bmt.go b/swarm/bmt/bmt.go new file mode 100644 index 000000000..71aee2495 --- /dev/null +++ b/swarm/bmt/bmt.go @@ -0,0 +1,543 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +// Package bmt provides a binary merkle tree implementation +package bmt + +import ( + "fmt" + "hash" + "strings" + "sync" + "sync/atomic" +) + +/* +Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size +It is defined as the root hash of the binary merkle tree built over fixed size segments +of the underlying chunk using any base hash function (e.g keccak 256 SHA3). +Chunk with data shorter than the fixed size are hashed as if they had zero padding + +BMT hash is used as the chunk hash function in swarm which in turn is the basis for the +128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash + +The BMT is optimal for providing compact inclusion proofs, i.e. prove that a +segment is a substring of a chunk starting at a particular offset +The size of the underlying segments is fixed to the size of the base hash (called the resolution +of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification +as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash. + +Two implementations are provided: + +* RefHasher is optimized for code simplicity and meant as a reference implementation + that is simple to understand +* Hasher is optimized for speed taking advantage of concurrency with minimalistic + control structure to coordinate the concurrent routines + It implements the following interfaces + * standard golang hash.Hash + * SwarmHash + * io.Writer + * TODO: SegmentWriter +*/ + +const ( + // SegmentCount is the maximum number of segments of the underlying chunk + // Should be equal to max-chunk-data-size / hash-size + SegmentCount = 128 + // PoolSize is the maximum number of bmt trees used by the hashers, i.e, + // the maximum number of concurrent BMT hashing operations performed by the same hasher + PoolSize = 8 +) + +// BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT. +// implemented by Keccak256 SHA3 sha3.NewKeccak256 +type BaseHasherFunc func() hash.Hash + +// Hasher a reusable hasher for fixed maximum size chunks representing a BMT +// - implements the hash.Hash interface +// - reuses a pool of trees for amortised memory allocation and resource control +// - supports order-agnostic concurrent segment writes (TODO:) +// as well as sequential read and write +// - the same hasher instance must not be called concurrently on more than one chunk +// - the same hasher instance is synchronously reuseable +// - Sum gives back the tree to the pool and guaranteed to leave +// the tree and itself in a state reusable for hashing a new chunk +// - generates and verifies segment inclusion proofs (TODO:) +type Hasher struct { + pool *TreePool // BMT resource pool + bmt *tree // prebuilt BMT resource for flowcontrol and proofs +} + +// New creates a reusable Hasher +// implements the hash.Hash interface +// pulls a new tree from a resource pool for hashing each chunk +func New(p *TreePool) *Hasher { + return &Hasher{ + pool: p, + } +} + +// TreePool provides a pool of trees used as resources by Hasher +// a tree popped from the pool is guaranteed to have clean state +// for hashing a new chunk +type TreePool struct { + lock sync.Mutex + c chan *tree // the channel to obtain a resource from the pool + hasher BaseHasherFunc // base hasher to use for the BMT levels + SegmentSize int // size of leaf segments, stipulated to be = hash size + SegmentCount int // the number of segments on the base level of the BMT + Capacity int // pool capacity, controls concurrency + Depth int // depth of the bmt trees = int(log2(segmentCount))+1 + Datalength int // the total length of the data (count * size) + count int // current count of (ever) allocated resources + zerohashes [][]byte // lookup table for predictable padding subtrees for all levels +} + +// NewTreePool creates a tree pool with hasher, segment size, segment count and capacity +// on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached +func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool { + // initialises the zerohashes lookup table + depth := calculateDepthFor(segmentCount) + segmentSize := hasher().Size() + zerohashes := make([][]byte, depth) + zeros := make([]byte, segmentSize) + zerohashes[0] = zeros + h := hasher() + for i := 1; i < depth; i++ { + h.Reset() + h.Write(zeros) + h.Write(zeros) + zeros = h.Sum(nil) + zerohashes[i] = zeros + } + return &TreePool{ + c: make(chan *tree, capacity), + hasher: hasher, + SegmentSize: segmentSize, + SegmentCount: segmentCount, + Capacity: capacity, + Datalength: segmentCount * segmentSize, + Depth: depth, + zerohashes: zerohashes, + } +} + +// Drain drains the pool until it has no more than n resources +func (p *TreePool) Drain(n int) { + p.lock.Lock() + defer p.lock.Unlock() + for len(p.c) > n { + <-p.c + p.count-- + } +} + +// Reserve is blocking until it returns an available tree +// it reuses free trees or creates a new one if size is not reached +// TODO: should use a context here +func (p *TreePool) reserve() *tree { + p.lock.Lock() + defer p.lock.Unlock() + var t *tree + if p.count == p.Capacity { + return <-p.c + } + select { + case t = <-p.c: + default: + t = newTree(p.SegmentSize, p.Depth) + p.count++ + } + return t +} + +// release gives back a tree to the pool. +// this tree is guaranteed to be in reusable state +func (p *TreePool) release(t *tree) { + p.c <- t // can never fail ... +} + +// tree is a reusable control structure representing a BMT +// organised in a binary tree +// Hasher uses a TreePool to obtain a tree for each chunk hash +// the tree is 'locked' while not in the pool +type tree struct { + leaves []*node // leaf nodes of the tree, other nodes accessible via parent links + cur int // index of rightmost currently open segment + offset int // offset (cursor position) within currently open segment + segment []byte // the rightmost open segment (not complete) + section []byte // the rightmost open section (double segment) + depth int // number of levels + result chan []byte // result channel + hash []byte // to record the result + span []byte // The span of the data subsumed under the chunk +} + +// node is a reuseable segment hasher representing a node in a BMT +type node struct { + isLeft bool // whether it is left side of the parent double segment + parent *node // pointer to parent node in the BMT + state int32 // atomic increment impl concurrent boolean toggle + left, right []byte // this is where the content segment is set +} + +// newNode constructs a segment hasher node in the BMT (used by newTree) +func newNode(index int, parent *node) *node { + return &node{ + parent: parent, + isLeft: index%2 == 0, + } +} + +// Draw draws the BMT (badly) +func (t *tree) draw(hash []byte) string { + var left, right []string + var anc []*node + for i, n := range t.leaves { + left = append(left, fmt.Sprintf("%v", hashstr(n.left))) + if i%2 == 0 { + anc = append(anc, n.parent) + } + right = append(right, fmt.Sprintf("%v", hashstr(n.right))) + } + anc = t.leaves + var hashes [][]string + for l := 0; len(anc) > 0; l++ { + var nodes []*node + hash := []string{""} + for i, n := range anc { + hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right))) + if i%2 == 0 && n.parent != nil { + nodes = append(nodes, n.parent) + } + } + hash = append(hash, "") + hashes = append(hashes, hash) + anc = nodes + } + hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""}) + total := 60 + del := " " + var rows []string + for i := len(hashes) - 1; i >= 0; i-- { + var textlen int + hash := hashes[i] + for _, s := range hash { + textlen += len(s) + } + if total < textlen { + total = textlen + len(hash) + } + delsize := (total - textlen) / (len(hash) - 1) + if delsize > len(del) { + delsize = len(del) + } + row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize])) + rows = append(rows, row) + + } + rows = append(rows, strings.Join(left, " ")) + rows = append(rows, strings.Join(right, " ")) + return strings.Join(rows, "\n") + "\n" +} + +// newTree initialises a tree by building up the nodes of a BMT +// - segment size is stipulated to be the size of the hash +func newTree(segmentSize, depth int) *tree { + n := newNode(0, nil) + prevlevel := []*node{n} + // iterate over levels and creates 2^(depth-level) nodes + count := 2 + for level := depth - 2; level >= 0; level-- { + nodes := make([]*node, count) + for i := 0; i < count; i++ { + parent := prevlevel[i/2] + nodes[i] = newNode(i, parent) + } + prevlevel = nodes + count *= 2 + } + // the datanode level is the nodes on the last level + return &tree{ + leaves: prevlevel, + result: make(chan []byte, 1), + segment: make([]byte, segmentSize), + section: make([]byte, 2*segmentSize), + } +} + +// methods needed by hash.Hash + +// Size returns the size +func (h *Hasher) Size() int { + return h.pool.SegmentSize +} + +// BlockSize returns the block size +func (h *Hasher) BlockSize() int { + return h.pool.SegmentSize +} + +// Hash hashes the data and the span using the bmt hasher +func Hash(h *Hasher, span, data []byte) []byte { + h.ResetWithLength(span) + h.Write(data) + return h.Sum(nil) +} + +// Datalength returns the maximum data size that is hashed by the hasher = +// segment count times segment size +func (h *Hasher) DataLength() int { + return h.pool.Datalength +} + +// Sum returns the hash of the buffer +// hash.Hash interface Sum method appends the byte slice to the underlying +// data before it calculates and returns the hash of the chunk +// caller must make sure Sum is not called concurrently with Write, writeSection +// and WriteSegment (TODO:) +func (h *Hasher) Sum(b []byte) (r []byte) { + return h.sum(b, true, true) +} + +// sum implements Sum taking parameters +// * if the tree is released right away +// * if sequential write is used (can read sections) +func (h *Hasher) sum(b []byte, release, section bool) (r []byte) { + t := h.bmt + h.finalise(section) + if t.offset > 0 { // get the last node (double segment) + + // padding the segment with zero + copy(t.segment[t.offset:], h.pool.zerohashes[0]) + } + if section { + if t.cur%2 == 1 { + // if just finished current segment, copy it to the right half of the chunk + copy(t.section[h.pool.SegmentSize:], t.segment) + } else { + // copy segment to front of section, zero pad the right half + copy(t.section, t.segment) + copy(t.section[h.pool.SegmentSize:], h.pool.zerohashes[0]) + } + h.writeSection(t.cur, t.section) + } else { + // TODO: h.writeSegment(t.cur, t.segment) + panic("SegmentWriter not implemented") + } + bmtHash := <-t.result + span := t.span + + if release { + h.releaseTree() + } + // sha3(span + BMT(pure_chunk)) + if span == nil { + return bmtHash + } + bh := h.pool.hasher() + bh.Reset() + bh.Write(span) + bh.Write(bmtHash) + return bh.Sum(b) +} + +// Hasher implements the SwarmHash interface + +// Hasher implements the io.Writer interface + +// Write fills the buffer to hash, +// with every full segment calls writeSection +func (h *Hasher) Write(b []byte) (int, error) { + l := len(b) + if l <= 0 { + return 0, nil + } + t := h.bmt + need := (h.pool.SegmentCount - t.cur) * h.pool.SegmentSize + if l < need { + need = l + } + // calculate missing bit to complete current open segment + rest := h.pool.SegmentSize - t.offset + if need < rest { + rest = need + } + copy(t.segment[t.offset:], b[:rest]) + need -= rest + size := (t.offset + rest) % h.pool.SegmentSize + // read full segments and the last possibly partial segment + for need > 0 { + // push all finished chunks we read + if t.cur%2 == 0 { + copy(t.section, t.segment) + } else { + copy(t.section[h.pool.SegmentSize:], t.segment) + h.writeSection(t.cur, t.section) + } + size = h.pool.SegmentSize + if need < size { + size = need + } + copy(t.segment, b[rest:rest+size]) + need -= size + rest += size + t.cur++ + } + t.offset = size % h.pool.SegmentSize + return l, nil +} + +// Reset needs to be called before writing to the hasher +func (h *Hasher) Reset() { + h.getTree() +} + +// Hasher implements the SwarmHash interface + +// ResetWithLength needs to be called before writing to the hasher +// the argument is supposed to be the byte slice binary representation of +// the length of the data subsumed under the hash, i.e., span +func (h *Hasher) ResetWithLength(span []byte) { + h.Reset() + h.bmt.span = span +} + +// releaseTree gives back the Tree to the pool whereby it unlocks +// it resets tree, segment and index +func (h *Hasher) releaseTree() { + t := h.bmt + if t != nil { + t.cur = 0 + t.offset = 0 + t.span = nil + t.hash = nil + h.bmt = nil + h.pool.release(t) + } +} + +// TODO: writeSegment writes the ith segment into the BMT tree +// func (h *Hasher) writeSegment(i int, s []byte) { +// go h.run(h.bmt.leaves[i/2], h.pool.hasher(), i%2 == 0, s) +// } + +// writeSection writes the hash of i/2-th segction into right level 1 node of the BMT tree +func (h *Hasher) writeSection(i int, section []byte) { + n := h.bmt.leaves[i/2] + isLeft := n.isLeft + n = n.parent + bh := h.pool.hasher() + bh.Write(section) + go func() { + sum := bh.Sum(nil) + if n == nil { + h.bmt.result <- sum + return + } + h.run(n, bh, isLeft, sum) + }() +} + +// run pushes the data to the node +// if it is the first of 2 sisters written the routine returns +// if it is the second, it calculates the hash and writes it +// to the parent node recursively +func (h *Hasher) run(n *node, bh hash.Hash, isLeft bool, s []byte) { + for { + if isLeft { + n.left = s + } else { + n.right = s + } + // the child-thread first arriving will quit + if n.toggle() { + return + } + // the second thread now can be sure both left and right children are written + // it calculates the hash of left|right and take it to the next level + bh.Reset() + bh.Write(n.left) + bh.Write(n.right) + s = bh.Sum(nil) + + // at the root of the bmt just write the result to the result channel + if n.parent == nil { + h.bmt.result <- s + return + } + + // otherwise iterate on parent + isLeft = n.isLeft + n = n.parent + } +} + +// finalise is following the path starting from the final datasegment to the +// BMT root via parents +// for unbalanced trees it fills in the missing right sister nodes using +// the pool's lookup table for BMT subtree root hashes for all-zero sections +func (h *Hasher) finalise(skip bool) { + t := h.bmt + isLeft := t.cur%2 == 0 + n := t.leaves[t.cur/2] + for level := 0; n != nil; level++ { + // when the final segment's path is going via left child node + // we include an all-zero subtree hash for the right level and toggle the node. + // when the path is going through right child node, nothing to do + if isLeft && !skip { + n.right = h.pool.zerohashes[level] + n.toggle() + } + skip = false + isLeft = n.isLeft + n = n.parent + } +} + +// getTree obtains a BMT resource by reserving one from the pool +func (h *Hasher) getTree() *tree { + if h.bmt != nil { + return h.bmt + } + t := h.pool.reserve() + h.bmt = t + return t +} + +// atomic bool toggle implementing a concurrent reusable 2-state object +// atomic addint with %2 implements atomic bool toggle +// it returns true if the toggler just put it in the active/waiting state +func (n *node) toggle() bool { + return atomic.AddInt32(&n.state, 1)%2 == 1 +} + +func hashstr(b []byte) string { + end := len(b) + if end > 4 { + end = 4 + } + return fmt.Sprintf("%x", b[:end]) +} + +// calculateDepthFor calculates the depth (number of levels) in the BMT tree +func calculateDepthFor(n int) (d int) { + c := 2 + for ; c < n; c *= 2 { + d++ + } + return d + 1 +} diff --git a/swarm/bmt/bmt_r.go b/swarm/bmt/bmt_r.go new file mode 100644 index 000000000..c61d2dc73 --- /dev/null +++ b/swarm/bmt/bmt_r.go @@ -0,0 +1,85 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +// Package bmt is a simple nonconcurrent reference implementation for hashsize segment based +// Binary Merkle tree hash on arbitrary but fixed maximum chunksize +// +// This implementation does not take advantage of any paralellisms and uses +// far more memory than necessary, but it is easy to see that it is correct. +// It can be used for generating test cases for optimized implementations. +// There is extra check on reference hasher correctness in bmt_test.go +// * TestRefHasher +// * testBMTHasherCorrectness function +package bmt + +import ( + "hash" +) + +// RefHasher is the non-optimized easy-to-read reference implementation of BMT +type RefHasher struct { + maxDataLength int // c * hashSize, where c = 2 ^ ceil(log2(count)), where count = ceil(length / hashSize) + sectionLength int // 2 * hashSize + hasher hash.Hash // base hash func (Keccak256 SHA3) +} + +// NewRefHasher returns a new RefHasher +func NewRefHasher(hasher BaseHasherFunc, count int) *RefHasher { + h := hasher() + hashsize := h.Size() + c := 2 + for ; c < count; c *= 2 { + } + return &RefHasher{ + sectionLength: 2 * hashsize, + maxDataLength: c * hashsize, + hasher: h, + } +} + +// Hash returns the BMT hash of the byte slice +// implements the SwarmHash interface +func (rh *RefHasher) Hash(data []byte) []byte { + // if data is shorter than the base length (maxDataLength), we provide padding with zeros + d := make([]byte, rh.maxDataLength) + length := len(data) + if length > rh.maxDataLength { + length = rh.maxDataLength + } + copy(d, data[:length]) + return rh.hash(d, rh.maxDataLength) +} + +// data has length maxDataLength = segmentSize * 2^k +// hash calls itself recursively on both halves of the given slice +// concatenates the results, and returns the hash of that +// if the length of d is 2 * segmentSize then just returns the hash of that section +func (rh *RefHasher) hash(data []byte, length int) []byte { + var section []byte + if length == rh.sectionLength { + // section contains two data segments (d) + section = data + } else { + // section contains hashes of left and right BMT subtreea + // to be calculated by calling hash recursively on left and right half of d + length /= 2 + section = append(rh.hash(data[:length], length), rh.hash(data[length:], length)...) + } + rh.hasher.Reset() + rh.hasher.Write(section) + s := rh.hasher.Sum(nil) + return s +} diff --git a/swarm/bmt/bmt_test.go b/swarm/bmt/bmt_test.go new file mode 100644 index 000000000..e074d90e7 --- /dev/null +++ b/swarm/bmt/bmt_test.go @@ -0,0 +1,390 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package bmt + +import ( + "bytes" + crand "crypto/rand" + "encoding/binary" + "fmt" + "io" + "math/rand" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/ethereum/go-ethereum/crypto/sha3" +) + +// the actual data length generated (could be longer than max datalength of the BMT) +const BufferSize = 4128 + +func sha3hash(data ...[]byte) []byte { + h := sha3.NewKeccak256() + for _, v := range data { + h.Write(v) + } + return h.Sum(nil) +} + +// TestRefHasher tests that the RefHasher computes the expected BMT hash for +// all data lengths between 0 and 256 bytes +func TestRefHasher(t *testing.T) { + + // the test struct is used to specify the expected BMT hash for + // segment counts between from and to and lengths from 1 to datalength + type test struct { + from int + to int + expected func([]byte) []byte + } + + var tests []*test + // all lengths in [0,64] should be: + // + // sha3hash(data) + // + tests = append(tests, &test{ + from: 1, + to: 2, + expected: func(d []byte) []byte { + data := make([]byte, 64) + copy(data, d) + return sha3hash(data) + }, + }) + + // all lengths in [3,4] should be: + // + // sha3hash( + // sha3hash(data[:64]) + // sha3hash(data[64:]) + // ) + // + tests = append(tests, &test{ + from: 3, + to: 4, + expected: func(d []byte) []byte { + data := make([]byte, 128) + copy(data, d) + return sha3hash(sha3hash(data[:64]), sha3hash(data[64:])) + }, + }) + + // all segmentCounts in [5,8] should be: + // + // sha3hash( + // sha3hash( + // sha3hash(data[:64]) + // sha3hash(data[64:128]) + // ) + // sha3hash( + // sha3hash(data[128:192]) + // sha3hash(data[192:]) + // ) + // ) + // + tests = append(tests, &test{ + from: 5, + to: 8, + expected: func(d []byte) []byte { + data := make([]byte, 256) + copy(data, d) + return sha3hash(sha3hash(sha3hash(data[:64]), sha3hash(data[64:128])), sha3hash(sha3hash(data[128:192]), sha3hash(data[192:]))) + }, + }) + + // run the tests + for _, x := range tests { + for segmentCount := x.from; segmentCount <= x.to; segmentCount++ { + for length := 1; length <= segmentCount*32; length++ { + t.Run(fmt.Sprintf("%d_segments_%d_bytes", segmentCount, length), func(t *testing.T) { + data := make([]byte, length) + if _, err := io.ReadFull(crand.Reader, data); err != nil && err != io.EOF { + t.Fatal(err) + } + expected := x.expected(data) + actual := NewRefHasher(sha3.NewKeccak256, segmentCount).Hash(data) + if !bytes.Equal(actual, expected) { + t.Fatalf("expected %x, got %x", expected, actual) + } + }) + } + } + } +} + +func TestHasherCorrectness(t *testing.T) { + err := testHasher(testBaseHasher) + if err != nil { + t.Fatal(err) + } +} + +func testHasher(f func(BaseHasherFunc, []byte, int, int) error) error { + data := newData(BufferSize) + hasher := sha3.NewKeccak256 + size := hasher().Size() + counts := []int{1, 2, 3, 4, 5, 8, 16, 32, 64, 128} + + var err error + for _, count := range counts { + max := count * size + incr := 1 + for n := 1; n <= max; n += incr { + err = f(hasher, data, n, count) + if err != nil { + return err + } + } + } + return nil +} + +// Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize +func TestHasherReuse(t *testing.T) { + t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) { + testHasherReuse(1, t) + }) + t.Run(fmt.Sprintf("poolsize_%d", PoolSize), func(t *testing.T) { + testHasherReuse(PoolSize, t) + }) +} + +func testHasherReuse(poolsize int, t *testing.T) { + hasher := sha3.NewKeccak256 + pool := NewTreePool(hasher, SegmentCount, poolsize) + defer pool.Drain(0) + bmt := New(pool) + + for i := 0; i < 100; i++ { + data := newData(BufferSize) + n := rand.Intn(bmt.DataLength()) + err := testHasherCorrectness(bmt, hasher, data, n, SegmentCount) + if err != nil { + t.Fatal(err) + } + } +} + +// Tests if pool can be cleanly reused even in concurrent use +func TestBMTHasherConcurrentUse(t *testing.T) { + hasher := sha3.NewKeccak256 + pool := NewTreePool(hasher, SegmentCount, PoolSize) + defer pool.Drain(0) + cycles := 100 + errc := make(chan error) + + for i := 0; i < cycles; i++ { + go func() { + bmt := New(pool) + data := newData(BufferSize) + n := rand.Intn(bmt.DataLength()) + errc <- testHasherCorrectness(bmt, hasher, data, n, 128) + }() + } +LOOP: + for { + select { + case <-time.NewTimer(5 * time.Second).C: + t.Fatal("timed out") + case err := <-errc: + if err != nil { + t.Fatal(err) + } + cycles-- + if cycles == 0 { + break LOOP + } + } + } +} + +// helper function that creates a tree pool +func testBaseHasher(hasher BaseHasherFunc, d []byte, n, count int) error { + pool := NewTreePool(hasher, count, 1) + defer pool.Drain(0) + bmt := New(pool) + return testHasherCorrectness(bmt, hasher, d, n, count) +} + +// helper function that compares reference and optimised implementations on +// correctness +func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, count int) (err error) { + span := make([]byte, 8) + if len(d) < n { + n = len(d) + } + binary.BigEndian.PutUint64(span, uint64(n)) + data := d[:n] + rbmt := NewRefHasher(hasher, count) + exp := sha3hash(span, rbmt.Hash(data)) + got := Hash(bmt, span, data) + if !bytes.Equal(got, exp) { + return fmt.Errorf("wrong hash: expected %x, got %x", exp, got) + } + return err +} + +func BenchmarkSHA3_4k(t *testing.B) { benchmarkSHA3(4096, t) } +func BenchmarkSHA3_2k(t *testing.B) { benchmarkSHA3(4096/2, t) } +func BenchmarkSHA3_1k(t *testing.B) { benchmarkSHA3(4096/4, t) } +func BenchmarkSHA3_512b(t *testing.B) { benchmarkSHA3(4096/8, t) } +func BenchmarkSHA3_256b(t *testing.B) { benchmarkSHA3(4096/16, t) } +func BenchmarkSHA3_128b(t *testing.B) { benchmarkSHA3(4096/32, t) } + +func BenchmarkBMTBaseline_4k(t *testing.B) { benchmarkBMTBaseline(4096, t) } +func BenchmarkBMTBaseline_2k(t *testing.B) { benchmarkBMTBaseline(4096/2, t) } +func BenchmarkBMTBaseline_1k(t *testing.B) { benchmarkBMTBaseline(4096/4, t) } +func BenchmarkBMTBaseline_512b(t *testing.B) { benchmarkBMTBaseline(4096/8, t) } +func BenchmarkBMTBaseline_256b(t *testing.B) { benchmarkBMTBaseline(4096/16, t) } +func BenchmarkBMTBaseline_128b(t *testing.B) { benchmarkBMTBaseline(4096/32, t) } + +func BenchmarkRefHasher_4k(t *testing.B) { benchmarkRefHasher(4096, t) } +func BenchmarkRefHasher_2k(t *testing.B) { benchmarkRefHasher(4096/2, t) } +func BenchmarkRefHasher_1k(t *testing.B) { benchmarkRefHasher(4096/4, t) } +func BenchmarkRefHasher_512b(t *testing.B) { benchmarkRefHasher(4096/8, t) } +func BenchmarkRefHasher_256b(t *testing.B) { benchmarkRefHasher(4096/16, t) } +func BenchmarkRefHasher_128b(t *testing.B) { benchmarkRefHasher(4096/32, t) } + +func BenchmarkBMTHasher_4k(t *testing.B) { benchmarkBMTHasher(4096, t) } +func BenchmarkBMTHasher_2k(t *testing.B) { benchmarkBMTHasher(4096/2, t) } +func BenchmarkBMTHasher_1k(t *testing.B) { benchmarkBMTHasher(4096/4, t) } +func BenchmarkBMTHasher_512b(t *testing.B) { benchmarkBMTHasher(4096/8, t) } +func BenchmarkBMTHasher_256b(t *testing.B) { benchmarkBMTHasher(4096/16, t) } +func BenchmarkBMTHasher_128b(t *testing.B) { benchmarkBMTHasher(4096/32, t) } + +func BenchmarkBMTHasherNoPool_4k(t *testing.B) { benchmarkBMTHasherPool(1, 4096, t) } +func BenchmarkBMTHasherNoPool_2k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/2, t) } +func BenchmarkBMTHasherNoPool_1k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/4, t) } +func BenchmarkBMTHasherNoPool_512b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/8, t) } +func BenchmarkBMTHasherNoPool_256b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/16, t) } +func BenchmarkBMTHasherNoPool_128b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/32, t) } + +func BenchmarkBMTHasherPool_4k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096, t) } +func BenchmarkBMTHasherPool_2k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/2, t) } +func BenchmarkBMTHasherPool_1k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/4, t) } +func BenchmarkBMTHasherPool_512b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/8, t) } +func BenchmarkBMTHasherPool_256b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/16, t) } +func BenchmarkBMTHasherPool_128b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/32, t) } + +// benchmarks simple sha3 hash on chunks +func benchmarkSHA3(n int, t *testing.B) { + data := newData(n) + hasher := sha3.NewKeccak256 + h := hasher() + + t.ReportAllocs() + t.ResetTimer() + for i := 0; i < t.N; i++ { + h.Reset() + h.Write(data) + h.Sum(nil) + } +} + +// benchmarks the minimum hashing time for a balanced (for simplicity) BMT +// by doing count/segmentsize parallel hashings of 2*segmentsize bytes +// doing it on n PoolSize each reusing the base hasher +// the premise is that this is the minimum computation needed for a BMT +// therefore this serves as a theoretical optimum for concurrent implementations +func benchmarkBMTBaseline(n int, t *testing.B) { + hasher := sha3.NewKeccak256 + hashSize := hasher().Size() + data := newData(hashSize) + + t.ReportAllocs() + t.ResetTimer() + for i := 0; i < t.N; i++ { + count := int32((n-1)/hashSize + 1) + wg := sync.WaitGroup{} + wg.Add(PoolSize) + var i int32 + for j := 0; j < PoolSize; j++ { + go func() { + defer wg.Done() + h := hasher() + for atomic.AddInt32(&i, 1) < count { + h.Reset() + h.Write(data) + h.Sum(nil) + } + }() + } + wg.Wait() + } +} + +// benchmarks BMT Hasher +func benchmarkBMTHasher(n int, t *testing.B) { + data := newData(n) + hasher := sha3.NewKeccak256 + pool := NewTreePool(hasher, SegmentCount, PoolSize) + + t.ReportAllocs() + t.ResetTimer() + for i := 0; i < t.N; i++ { + bmt := New(pool) + Hash(bmt, nil, data) + } +} + +// benchmarks 100 concurrent bmt hashes with pool capacity +func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) { + data := newData(n) + hasher := sha3.NewKeccak256 + pool := NewTreePool(hasher, SegmentCount, poolsize) + cycles := 100 + + t.ReportAllocs() + t.ResetTimer() + wg := sync.WaitGroup{} + for i := 0; i < t.N; i++ { + wg.Add(cycles) + for j := 0; j < cycles; j++ { + go func() { + defer wg.Done() + bmt := New(pool) + Hash(bmt, nil, data) + }() + } + wg.Wait() + } +} + +// benchmarks the reference hasher +func benchmarkRefHasher(n int, t *testing.B) { + data := newData(n) + hasher := sha3.NewKeccak256 + rbmt := NewRefHasher(hasher, 128) + + t.ReportAllocs() + t.ResetTimer() + for i := 0; i < t.N; i++ { + rbmt.Hash(data) + } +} + +func newData(bufferSize int) []byte { + data := make([]byte, bufferSize) + _, err := io.ReadFull(crand.Reader, data) + if err != nil { + panic(err.Error()) + } + return data +} |