aboutsummaryrefslogtreecommitdiffstats
path: root/swarm/bmt
diff options
context:
space:
mode:
authorethersphere <thesw@rm.eth>2018-06-20 20:06:27 +0800
committerethersphere <thesw@rm.eth>2018-06-22 03:10:31 +0800
commite187711c6545487d4cac3701f0f506bb536234e2 (patch)
treed2f6150f70b84b36e49a449082aeda267b4b9046 /swarm/bmt
parent574378edb50c907b532946a1d4654dbd6701b20a (diff)
downloaddexon-e187711c6545487d4cac3701f0f506bb536234e2.tar
dexon-e187711c6545487d4cac3701f0f506bb536234e2.tar.gz
dexon-e187711c6545487d4cac3701f0f506bb536234e2.tar.bz2
dexon-e187711c6545487d4cac3701f0f506bb536234e2.tar.lz
dexon-e187711c6545487d4cac3701f0f506bb536234e2.tar.xz
dexon-e187711c6545487d4cac3701f0f506bb536234e2.tar.zst
dexon-e187711c6545487d4cac3701f0f506bb536234e2.zip
swarm: network rewrite merge
Diffstat (limited to 'swarm/bmt')
-rw-r--r--swarm/bmt/bmt.go543
-rw-r--r--swarm/bmt/bmt_r.go85
-rw-r--r--swarm/bmt/bmt_test.go390
3 files changed, 1018 insertions, 0 deletions
diff --git a/swarm/bmt/bmt.go b/swarm/bmt/bmt.go
new file mode 100644
index 000000000..71aee2495
--- /dev/null
+++ b/swarm/bmt/bmt.go
@@ -0,0 +1,543 @@
+// Copyright 2018 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package bmt provides a binary merkle tree implementation
+package bmt
+
+import (
+ "fmt"
+ "hash"
+ "strings"
+ "sync"
+ "sync/atomic"
+)
+
+/*
+Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size
+It is defined as the root hash of the binary merkle tree built over fixed size segments
+of the underlying chunk using any base hash function (e.g keccak 256 SHA3).
+Chunk with data shorter than the fixed size are hashed as if they had zero padding
+
+BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
+128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
+
+The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
+segment is a substring of a chunk starting at a particular offset
+The size of the underlying segments is fixed to the size of the base hash (called the resolution
+of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
+as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
+
+Two implementations are provided:
+
+* RefHasher is optimized for code simplicity and meant as a reference implementation
+ that is simple to understand
+* Hasher is optimized for speed taking advantage of concurrency with minimalistic
+ control structure to coordinate the concurrent routines
+ It implements the following interfaces
+ * standard golang hash.Hash
+ * SwarmHash
+ * io.Writer
+ * TODO: SegmentWriter
+*/
+
+const (
+ // SegmentCount is the maximum number of segments of the underlying chunk
+ // Should be equal to max-chunk-data-size / hash-size
+ SegmentCount = 128
+ // PoolSize is the maximum number of bmt trees used by the hashers, i.e,
+ // the maximum number of concurrent BMT hashing operations performed by the same hasher
+ PoolSize = 8
+)
+
+// BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT.
+// implemented by Keccak256 SHA3 sha3.NewKeccak256
+type BaseHasherFunc func() hash.Hash
+
+// Hasher a reusable hasher for fixed maximum size chunks representing a BMT
+// - implements the hash.Hash interface
+// - reuses a pool of trees for amortised memory allocation and resource control
+// - supports order-agnostic concurrent segment writes (TODO:)
+// as well as sequential read and write
+// - the same hasher instance must not be called concurrently on more than one chunk
+// - the same hasher instance is synchronously reuseable
+// - Sum gives back the tree to the pool and guaranteed to leave
+// the tree and itself in a state reusable for hashing a new chunk
+// - generates and verifies segment inclusion proofs (TODO:)
+type Hasher struct {
+ pool *TreePool // BMT resource pool
+ bmt *tree // prebuilt BMT resource for flowcontrol and proofs
+}
+
+// New creates a reusable Hasher
+// implements the hash.Hash interface
+// pulls a new tree from a resource pool for hashing each chunk
+func New(p *TreePool) *Hasher {
+ return &Hasher{
+ pool: p,
+ }
+}
+
+// TreePool provides a pool of trees used as resources by Hasher
+// a tree popped from the pool is guaranteed to have clean state
+// for hashing a new chunk
+type TreePool struct {
+ lock sync.Mutex
+ c chan *tree // the channel to obtain a resource from the pool
+ hasher BaseHasherFunc // base hasher to use for the BMT levels
+ SegmentSize int // size of leaf segments, stipulated to be = hash size
+ SegmentCount int // the number of segments on the base level of the BMT
+ Capacity int // pool capacity, controls concurrency
+ Depth int // depth of the bmt trees = int(log2(segmentCount))+1
+ Datalength int // the total length of the data (count * size)
+ count int // current count of (ever) allocated resources
+ zerohashes [][]byte // lookup table for predictable padding subtrees for all levels
+}
+
+// NewTreePool creates a tree pool with hasher, segment size, segment count and capacity
+// on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached
+func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool {
+ // initialises the zerohashes lookup table
+ depth := calculateDepthFor(segmentCount)
+ segmentSize := hasher().Size()
+ zerohashes := make([][]byte, depth)
+ zeros := make([]byte, segmentSize)
+ zerohashes[0] = zeros
+ h := hasher()
+ for i := 1; i < depth; i++ {
+ h.Reset()
+ h.Write(zeros)
+ h.Write(zeros)
+ zeros = h.Sum(nil)
+ zerohashes[i] = zeros
+ }
+ return &TreePool{
+ c: make(chan *tree, capacity),
+ hasher: hasher,
+ SegmentSize: segmentSize,
+ SegmentCount: segmentCount,
+ Capacity: capacity,
+ Datalength: segmentCount * segmentSize,
+ Depth: depth,
+ zerohashes: zerohashes,
+ }
+}
+
+// Drain drains the pool until it has no more than n resources
+func (p *TreePool) Drain(n int) {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+ for len(p.c) > n {
+ <-p.c
+ p.count--
+ }
+}
+
+// Reserve is blocking until it returns an available tree
+// it reuses free trees or creates a new one if size is not reached
+// TODO: should use a context here
+func (p *TreePool) reserve() *tree {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+ var t *tree
+ if p.count == p.Capacity {
+ return <-p.c
+ }
+ select {
+ case t = <-p.c:
+ default:
+ t = newTree(p.SegmentSize, p.Depth)
+ p.count++
+ }
+ return t
+}
+
+// release gives back a tree to the pool.
+// this tree is guaranteed to be in reusable state
+func (p *TreePool) release(t *tree) {
+ p.c <- t // can never fail ...
+}
+
+// tree is a reusable control structure representing a BMT
+// organised in a binary tree
+// Hasher uses a TreePool to obtain a tree for each chunk hash
+// the tree is 'locked' while not in the pool
+type tree struct {
+ leaves []*node // leaf nodes of the tree, other nodes accessible via parent links
+ cur int // index of rightmost currently open segment
+ offset int // offset (cursor position) within currently open segment
+ segment []byte // the rightmost open segment (not complete)
+ section []byte // the rightmost open section (double segment)
+ depth int // number of levels
+ result chan []byte // result channel
+ hash []byte // to record the result
+ span []byte // The span of the data subsumed under the chunk
+}
+
+// node is a reuseable segment hasher representing a node in a BMT
+type node struct {
+ isLeft bool // whether it is left side of the parent double segment
+ parent *node // pointer to parent node in the BMT
+ state int32 // atomic increment impl concurrent boolean toggle
+ left, right []byte // this is where the content segment is set
+}
+
+// newNode constructs a segment hasher node in the BMT (used by newTree)
+func newNode(index int, parent *node) *node {
+ return &node{
+ parent: parent,
+ isLeft: index%2 == 0,
+ }
+}
+
+// Draw draws the BMT (badly)
+func (t *tree) draw(hash []byte) string {
+ var left, right []string
+ var anc []*node
+ for i, n := range t.leaves {
+ left = append(left, fmt.Sprintf("%v", hashstr(n.left)))
+ if i%2 == 0 {
+ anc = append(anc, n.parent)
+ }
+ right = append(right, fmt.Sprintf("%v", hashstr(n.right)))
+ }
+ anc = t.leaves
+ var hashes [][]string
+ for l := 0; len(anc) > 0; l++ {
+ var nodes []*node
+ hash := []string{""}
+ for i, n := range anc {
+ hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right)))
+ if i%2 == 0 && n.parent != nil {
+ nodes = append(nodes, n.parent)
+ }
+ }
+ hash = append(hash, "")
+ hashes = append(hashes, hash)
+ anc = nodes
+ }
+ hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""})
+ total := 60
+ del := " "
+ var rows []string
+ for i := len(hashes) - 1; i >= 0; i-- {
+ var textlen int
+ hash := hashes[i]
+ for _, s := range hash {
+ textlen += len(s)
+ }
+ if total < textlen {
+ total = textlen + len(hash)
+ }
+ delsize := (total - textlen) / (len(hash) - 1)
+ if delsize > len(del) {
+ delsize = len(del)
+ }
+ row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize]))
+ rows = append(rows, row)
+
+ }
+ rows = append(rows, strings.Join(left, " "))
+ rows = append(rows, strings.Join(right, " "))
+ return strings.Join(rows, "\n") + "\n"
+}
+
+// newTree initialises a tree by building up the nodes of a BMT
+// - segment size is stipulated to be the size of the hash
+func newTree(segmentSize, depth int) *tree {
+ n := newNode(0, nil)
+ prevlevel := []*node{n}
+ // iterate over levels and creates 2^(depth-level) nodes
+ count := 2
+ for level := depth - 2; level >= 0; level-- {
+ nodes := make([]*node, count)
+ for i := 0; i < count; i++ {
+ parent := prevlevel[i/2]
+ nodes[i] = newNode(i, parent)
+ }
+ prevlevel = nodes
+ count *= 2
+ }
+ // the datanode level is the nodes on the last level
+ return &tree{
+ leaves: prevlevel,
+ result: make(chan []byte, 1),
+ segment: make([]byte, segmentSize),
+ section: make([]byte, 2*segmentSize),
+ }
+}
+
+// methods needed by hash.Hash
+
+// Size returns the size
+func (h *Hasher) Size() int {
+ return h.pool.SegmentSize
+}
+
+// BlockSize returns the block size
+func (h *Hasher) BlockSize() int {
+ return h.pool.SegmentSize
+}
+
+// Hash hashes the data and the span using the bmt hasher
+func Hash(h *Hasher, span, data []byte) []byte {
+ h.ResetWithLength(span)
+ h.Write(data)
+ return h.Sum(nil)
+}
+
+// Datalength returns the maximum data size that is hashed by the hasher =
+// segment count times segment size
+func (h *Hasher) DataLength() int {
+ return h.pool.Datalength
+}
+
+// Sum returns the hash of the buffer
+// hash.Hash interface Sum method appends the byte slice to the underlying
+// data before it calculates and returns the hash of the chunk
+// caller must make sure Sum is not called concurrently with Write, writeSection
+// and WriteSegment (TODO:)
+func (h *Hasher) Sum(b []byte) (r []byte) {
+ return h.sum(b, true, true)
+}
+
+// sum implements Sum taking parameters
+// * if the tree is released right away
+// * if sequential write is used (can read sections)
+func (h *Hasher) sum(b []byte, release, section bool) (r []byte) {
+ t := h.bmt
+ h.finalise(section)
+ if t.offset > 0 { // get the last node (double segment)
+
+ // padding the segment with zero
+ copy(t.segment[t.offset:], h.pool.zerohashes[0])
+ }
+ if section {
+ if t.cur%2 == 1 {
+ // if just finished current segment, copy it to the right half of the chunk
+ copy(t.section[h.pool.SegmentSize:], t.segment)
+ } else {
+ // copy segment to front of section, zero pad the right half
+ copy(t.section, t.segment)
+ copy(t.section[h.pool.SegmentSize:], h.pool.zerohashes[0])
+ }
+ h.writeSection(t.cur, t.section)
+ } else {
+ // TODO: h.writeSegment(t.cur, t.segment)
+ panic("SegmentWriter not implemented")
+ }
+ bmtHash := <-t.result
+ span := t.span
+
+ if release {
+ h.releaseTree()
+ }
+ // sha3(span + BMT(pure_chunk))
+ if span == nil {
+ return bmtHash
+ }
+ bh := h.pool.hasher()
+ bh.Reset()
+ bh.Write(span)
+ bh.Write(bmtHash)
+ return bh.Sum(b)
+}
+
+// Hasher implements the SwarmHash interface
+
+// Hasher implements the io.Writer interface
+
+// Write fills the buffer to hash,
+// with every full segment calls writeSection
+func (h *Hasher) Write(b []byte) (int, error) {
+ l := len(b)
+ if l <= 0 {
+ return 0, nil
+ }
+ t := h.bmt
+ need := (h.pool.SegmentCount - t.cur) * h.pool.SegmentSize
+ if l < need {
+ need = l
+ }
+ // calculate missing bit to complete current open segment
+ rest := h.pool.SegmentSize - t.offset
+ if need < rest {
+ rest = need
+ }
+ copy(t.segment[t.offset:], b[:rest])
+ need -= rest
+ size := (t.offset + rest) % h.pool.SegmentSize
+ // read full segments and the last possibly partial segment
+ for need > 0 {
+ // push all finished chunks we read
+ if t.cur%2 == 0 {
+ copy(t.section, t.segment)
+ } else {
+ copy(t.section[h.pool.SegmentSize:], t.segment)
+ h.writeSection(t.cur, t.section)
+ }
+ size = h.pool.SegmentSize
+ if need < size {
+ size = need
+ }
+ copy(t.segment, b[rest:rest+size])
+ need -= size
+ rest += size
+ t.cur++
+ }
+ t.offset = size % h.pool.SegmentSize
+ return l, nil
+}
+
+// Reset needs to be called before writing to the hasher
+func (h *Hasher) Reset() {
+ h.getTree()
+}
+
+// Hasher implements the SwarmHash interface
+
+// ResetWithLength needs to be called before writing to the hasher
+// the argument is supposed to be the byte slice binary representation of
+// the length of the data subsumed under the hash, i.e., span
+func (h *Hasher) ResetWithLength(span []byte) {
+ h.Reset()
+ h.bmt.span = span
+}
+
+// releaseTree gives back the Tree to the pool whereby it unlocks
+// it resets tree, segment and index
+func (h *Hasher) releaseTree() {
+ t := h.bmt
+ if t != nil {
+ t.cur = 0
+ t.offset = 0
+ t.span = nil
+ t.hash = nil
+ h.bmt = nil
+ h.pool.release(t)
+ }
+}
+
+// TODO: writeSegment writes the ith segment into the BMT tree
+// func (h *Hasher) writeSegment(i int, s []byte) {
+// go h.run(h.bmt.leaves[i/2], h.pool.hasher(), i%2 == 0, s)
+// }
+
+// writeSection writes the hash of i/2-th segction into right level 1 node of the BMT tree
+func (h *Hasher) writeSection(i int, section []byte) {
+ n := h.bmt.leaves[i/2]
+ isLeft := n.isLeft
+ n = n.parent
+ bh := h.pool.hasher()
+ bh.Write(section)
+ go func() {
+ sum := bh.Sum(nil)
+ if n == nil {
+ h.bmt.result <- sum
+ return
+ }
+ h.run(n, bh, isLeft, sum)
+ }()
+}
+
+// run pushes the data to the node
+// if it is the first of 2 sisters written the routine returns
+// if it is the second, it calculates the hash and writes it
+// to the parent node recursively
+func (h *Hasher) run(n *node, bh hash.Hash, isLeft bool, s []byte) {
+ for {
+ if isLeft {
+ n.left = s
+ } else {
+ n.right = s
+ }
+ // the child-thread first arriving will quit
+ if n.toggle() {
+ return
+ }
+ // the second thread now can be sure both left and right children are written
+ // it calculates the hash of left|right and take it to the next level
+ bh.Reset()
+ bh.Write(n.left)
+ bh.Write(n.right)
+ s = bh.Sum(nil)
+
+ // at the root of the bmt just write the result to the result channel
+ if n.parent == nil {
+ h.bmt.result <- s
+ return
+ }
+
+ // otherwise iterate on parent
+ isLeft = n.isLeft
+ n = n.parent
+ }
+}
+
+// finalise is following the path starting from the final datasegment to the
+// BMT root via parents
+// for unbalanced trees it fills in the missing right sister nodes using
+// the pool's lookup table for BMT subtree root hashes for all-zero sections
+func (h *Hasher) finalise(skip bool) {
+ t := h.bmt
+ isLeft := t.cur%2 == 0
+ n := t.leaves[t.cur/2]
+ for level := 0; n != nil; level++ {
+ // when the final segment's path is going via left child node
+ // we include an all-zero subtree hash for the right level and toggle the node.
+ // when the path is going through right child node, nothing to do
+ if isLeft && !skip {
+ n.right = h.pool.zerohashes[level]
+ n.toggle()
+ }
+ skip = false
+ isLeft = n.isLeft
+ n = n.parent
+ }
+}
+
+// getTree obtains a BMT resource by reserving one from the pool
+func (h *Hasher) getTree() *tree {
+ if h.bmt != nil {
+ return h.bmt
+ }
+ t := h.pool.reserve()
+ h.bmt = t
+ return t
+}
+
+// atomic bool toggle implementing a concurrent reusable 2-state object
+// atomic addint with %2 implements atomic bool toggle
+// it returns true if the toggler just put it in the active/waiting state
+func (n *node) toggle() bool {
+ return atomic.AddInt32(&n.state, 1)%2 == 1
+}
+
+func hashstr(b []byte) string {
+ end := len(b)
+ if end > 4 {
+ end = 4
+ }
+ return fmt.Sprintf("%x", b[:end])
+}
+
+// calculateDepthFor calculates the depth (number of levels) in the BMT tree
+func calculateDepthFor(n int) (d int) {
+ c := 2
+ for ; c < n; c *= 2 {
+ d++
+ }
+ return d + 1
+}
diff --git a/swarm/bmt/bmt_r.go b/swarm/bmt/bmt_r.go
new file mode 100644
index 000000000..c61d2dc73
--- /dev/null
+++ b/swarm/bmt/bmt_r.go
@@ -0,0 +1,85 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package bmt is a simple nonconcurrent reference implementation for hashsize segment based
+// Binary Merkle tree hash on arbitrary but fixed maximum chunksize
+//
+// This implementation does not take advantage of any paralellisms and uses
+// far more memory than necessary, but it is easy to see that it is correct.
+// It can be used for generating test cases for optimized implementations.
+// There is extra check on reference hasher correctness in bmt_test.go
+// * TestRefHasher
+// * testBMTHasherCorrectness function
+package bmt
+
+import (
+ "hash"
+)
+
+// RefHasher is the non-optimized easy-to-read reference implementation of BMT
+type RefHasher struct {
+ maxDataLength int // c * hashSize, where c = 2 ^ ceil(log2(count)), where count = ceil(length / hashSize)
+ sectionLength int // 2 * hashSize
+ hasher hash.Hash // base hash func (Keccak256 SHA3)
+}
+
+// NewRefHasher returns a new RefHasher
+func NewRefHasher(hasher BaseHasherFunc, count int) *RefHasher {
+ h := hasher()
+ hashsize := h.Size()
+ c := 2
+ for ; c < count; c *= 2 {
+ }
+ return &RefHasher{
+ sectionLength: 2 * hashsize,
+ maxDataLength: c * hashsize,
+ hasher: h,
+ }
+}
+
+// Hash returns the BMT hash of the byte slice
+// implements the SwarmHash interface
+func (rh *RefHasher) Hash(data []byte) []byte {
+ // if data is shorter than the base length (maxDataLength), we provide padding with zeros
+ d := make([]byte, rh.maxDataLength)
+ length := len(data)
+ if length > rh.maxDataLength {
+ length = rh.maxDataLength
+ }
+ copy(d, data[:length])
+ return rh.hash(d, rh.maxDataLength)
+}
+
+// data has length maxDataLength = segmentSize * 2^k
+// hash calls itself recursively on both halves of the given slice
+// concatenates the results, and returns the hash of that
+// if the length of d is 2 * segmentSize then just returns the hash of that section
+func (rh *RefHasher) hash(data []byte, length int) []byte {
+ var section []byte
+ if length == rh.sectionLength {
+ // section contains two data segments (d)
+ section = data
+ } else {
+ // section contains hashes of left and right BMT subtreea
+ // to be calculated by calling hash recursively on left and right half of d
+ length /= 2
+ section = append(rh.hash(data[:length], length), rh.hash(data[length:], length)...)
+ }
+ rh.hasher.Reset()
+ rh.hasher.Write(section)
+ s := rh.hasher.Sum(nil)
+ return s
+}
diff --git a/swarm/bmt/bmt_test.go b/swarm/bmt/bmt_test.go
new file mode 100644
index 000000000..e074d90e7
--- /dev/null
+++ b/swarm/bmt/bmt_test.go
@@ -0,0 +1,390 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bmt
+
+import (
+ "bytes"
+ crand "crypto/rand"
+ "encoding/binary"
+ "fmt"
+ "io"
+ "math/rand"
+ "sync"
+ "sync/atomic"
+ "testing"
+ "time"
+
+ "github.com/ethereum/go-ethereum/crypto/sha3"
+)
+
+// the actual data length generated (could be longer than max datalength of the BMT)
+const BufferSize = 4128
+
+func sha3hash(data ...[]byte) []byte {
+ h := sha3.NewKeccak256()
+ for _, v := range data {
+ h.Write(v)
+ }
+ return h.Sum(nil)
+}
+
+// TestRefHasher tests that the RefHasher computes the expected BMT hash for
+// all data lengths between 0 and 256 bytes
+func TestRefHasher(t *testing.T) {
+
+ // the test struct is used to specify the expected BMT hash for
+ // segment counts between from and to and lengths from 1 to datalength
+ type test struct {
+ from int
+ to int
+ expected func([]byte) []byte
+ }
+
+ var tests []*test
+ // all lengths in [0,64] should be:
+ //
+ // sha3hash(data)
+ //
+ tests = append(tests, &test{
+ from: 1,
+ to: 2,
+ expected: func(d []byte) []byte {
+ data := make([]byte, 64)
+ copy(data, d)
+ return sha3hash(data)
+ },
+ })
+
+ // all lengths in [3,4] should be:
+ //
+ // sha3hash(
+ // sha3hash(data[:64])
+ // sha3hash(data[64:])
+ // )
+ //
+ tests = append(tests, &test{
+ from: 3,
+ to: 4,
+ expected: func(d []byte) []byte {
+ data := make([]byte, 128)
+ copy(data, d)
+ return sha3hash(sha3hash(data[:64]), sha3hash(data[64:]))
+ },
+ })
+
+ // all segmentCounts in [5,8] should be:
+ //
+ // sha3hash(
+ // sha3hash(
+ // sha3hash(data[:64])
+ // sha3hash(data[64:128])
+ // )
+ // sha3hash(
+ // sha3hash(data[128:192])
+ // sha3hash(data[192:])
+ // )
+ // )
+ //
+ tests = append(tests, &test{
+ from: 5,
+ to: 8,
+ expected: func(d []byte) []byte {
+ data := make([]byte, 256)
+ copy(data, d)
+ return sha3hash(sha3hash(sha3hash(data[:64]), sha3hash(data[64:128])), sha3hash(sha3hash(data[128:192]), sha3hash(data[192:])))
+ },
+ })
+
+ // run the tests
+ for _, x := range tests {
+ for segmentCount := x.from; segmentCount <= x.to; segmentCount++ {
+ for length := 1; length <= segmentCount*32; length++ {
+ t.Run(fmt.Sprintf("%d_segments_%d_bytes", segmentCount, length), func(t *testing.T) {
+ data := make([]byte, length)
+ if _, err := io.ReadFull(crand.Reader, data); err != nil && err != io.EOF {
+ t.Fatal(err)
+ }
+ expected := x.expected(data)
+ actual := NewRefHasher(sha3.NewKeccak256, segmentCount).Hash(data)
+ if !bytes.Equal(actual, expected) {
+ t.Fatalf("expected %x, got %x", expected, actual)
+ }
+ })
+ }
+ }
+ }
+}
+
+func TestHasherCorrectness(t *testing.T) {
+ err := testHasher(testBaseHasher)
+ if err != nil {
+ t.Fatal(err)
+ }
+}
+
+func testHasher(f func(BaseHasherFunc, []byte, int, int) error) error {
+ data := newData(BufferSize)
+ hasher := sha3.NewKeccak256
+ size := hasher().Size()
+ counts := []int{1, 2, 3, 4, 5, 8, 16, 32, 64, 128}
+
+ var err error
+ for _, count := range counts {
+ max := count * size
+ incr := 1
+ for n := 1; n <= max; n += incr {
+ err = f(hasher, data, n, count)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize
+func TestHasherReuse(t *testing.T) {
+ t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) {
+ testHasherReuse(1, t)
+ })
+ t.Run(fmt.Sprintf("poolsize_%d", PoolSize), func(t *testing.T) {
+ testHasherReuse(PoolSize, t)
+ })
+}
+
+func testHasherReuse(poolsize int, t *testing.T) {
+ hasher := sha3.NewKeccak256
+ pool := NewTreePool(hasher, SegmentCount, poolsize)
+ defer pool.Drain(0)
+ bmt := New(pool)
+
+ for i := 0; i < 100; i++ {
+ data := newData(BufferSize)
+ n := rand.Intn(bmt.DataLength())
+ err := testHasherCorrectness(bmt, hasher, data, n, SegmentCount)
+ if err != nil {
+ t.Fatal(err)
+ }
+ }
+}
+
+// Tests if pool can be cleanly reused even in concurrent use
+func TestBMTHasherConcurrentUse(t *testing.T) {
+ hasher := sha3.NewKeccak256
+ pool := NewTreePool(hasher, SegmentCount, PoolSize)
+ defer pool.Drain(0)
+ cycles := 100
+ errc := make(chan error)
+
+ for i := 0; i < cycles; i++ {
+ go func() {
+ bmt := New(pool)
+ data := newData(BufferSize)
+ n := rand.Intn(bmt.DataLength())
+ errc <- testHasherCorrectness(bmt, hasher, data, n, 128)
+ }()
+ }
+LOOP:
+ for {
+ select {
+ case <-time.NewTimer(5 * time.Second).C:
+ t.Fatal("timed out")
+ case err := <-errc:
+ if err != nil {
+ t.Fatal(err)
+ }
+ cycles--
+ if cycles == 0 {
+ break LOOP
+ }
+ }
+ }
+}
+
+// helper function that creates a tree pool
+func testBaseHasher(hasher BaseHasherFunc, d []byte, n, count int) error {
+ pool := NewTreePool(hasher, count, 1)
+ defer pool.Drain(0)
+ bmt := New(pool)
+ return testHasherCorrectness(bmt, hasher, d, n, count)
+}
+
+// helper function that compares reference and optimised implementations on
+// correctness
+func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, count int) (err error) {
+ span := make([]byte, 8)
+ if len(d) < n {
+ n = len(d)
+ }
+ binary.BigEndian.PutUint64(span, uint64(n))
+ data := d[:n]
+ rbmt := NewRefHasher(hasher, count)
+ exp := sha3hash(span, rbmt.Hash(data))
+ got := Hash(bmt, span, data)
+ if !bytes.Equal(got, exp) {
+ return fmt.Errorf("wrong hash: expected %x, got %x", exp, got)
+ }
+ return err
+}
+
+func BenchmarkSHA3_4k(t *testing.B) { benchmarkSHA3(4096, t) }
+func BenchmarkSHA3_2k(t *testing.B) { benchmarkSHA3(4096/2, t) }
+func BenchmarkSHA3_1k(t *testing.B) { benchmarkSHA3(4096/4, t) }
+func BenchmarkSHA3_512b(t *testing.B) { benchmarkSHA3(4096/8, t) }
+func BenchmarkSHA3_256b(t *testing.B) { benchmarkSHA3(4096/16, t) }
+func BenchmarkSHA3_128b(t *testing.B) { benchmarkSHA3(4096/32, t) }
+
+func BenchmarkBMTBaseline_4k(t *testing.B) { benchmarkBMTBaseline(4096, t) }
+func BenchmarkBMTBaseline_2k(t *testing.B) { benchmarkBMTBaseline(4096/2, t) }
+func BenchmarkBMTBaseline_1k(t *testing.B) { benchmarkBMTBaseline(4096/4, t) }
+func BenchmarkBMTBaseline_512b(t *testing.B) { benchmarkBMTBaseline(4096/8, t) }
+func BenchmarkBMTBaseline_256b(t *testing.B) { benchmarkBMTBaseline(4096/16, t) }
+func BenchmarkBMTBaseline_128b(t *testing.B) { benchmarkBMTBaseline(4096/32, t) }
+
+func BenchmarkRefHasher_4k(t *testing.B) { benchmarkRefHasher(4096, t) }
+func BenchmarkRefHasher_2k(t *testing.B) { benchmarkRefHasher(4096/2, t) }
+func BenchmarkRefHasher_1k(t *testing.B) { benchmarkRefHasher(4096/4, t) }
+func BenchmarkRefHasher_512b(t *testing.B) { benchmarkRefHasher(4096/8, t) }
+func BenchmarkRefHasher_256b(t *testing.B) { benchmarkRefHasher(4096/16, t) }
+func BenchmarkRefHasher_128b(t *testing.B) { benchmarkRefHasher(4096/32, t) }
+
+func BenchmarkBMTHasher_4k(t *testing.B) { benchmarkBMTHasher(4096, t) }
+func BenchmarkBMTHasher_2k(t *testing.B) { benchmarkBMTHasher(4096/2, t) }
+func BenchmarkBMTHasher_1k(t *testing.B) { benchmarkBMTHasher(4096/4, t) }
+func BenchmarkBMTHasher_512b(t *testing.B) { benchmarkBMTHasher(4096/8, t) }
+func BenchmarkBMTHasher_256b(t *testing.B) { benchmarkBMTHasher(4096/16, t) }
+func BenchmarkBMTHasher_128b(t *testing.B) { benchmarkBMTHasher(4096/32, t) }
+
+func BenchmarkBMTHasherNoPool_4k(t *testing.B) { benchmarkBMTHasherPool(1, 4096, t) }
+func BenchmarkBMTHasherNoPool_2k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/2, t) }
+func BenchmarkBMTHasherNoPool_1k(t *testing.B) { benchmarkBMTHasherPool(1, 4096/4, t) }
+func BenchmarkBMTHasherNoPool_512b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/8, t) }
+func BenchmarkBMTHasherNoPool_256b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/16, t) }
+func BenchmarkBMTHasherNoPool_128b(t *testing.B) { benchmarkBMTHasherPool(1, 4096/32, t) }
+
+func BenchmarkBMTHasherPool_4k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096, t) }
+func BenchmarkBMTHasherPool_2k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/2, t) }
+func BenchmarkBMTHasherPool_1k(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/4, t) }
+func BenchmarkBMTHasherPool_512b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/8, t) }
+func BenchmarkBMTHasherPool_256b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/16, t) }
+func BenchmarkBMTHasherPool_128b(t *testing.B) { benchmarkBMTHasherPool(PoolSize, 4096/32, t) }
+
+// benchmarks simple sha3 hash on chunks
+func benchmarkSHA3(n int, t *testing.B) {
+ data := newData(n)
+ hasher := sha3.NewKeccak256
+ h := hasher()
+
+ t.ReportAllocs()
+ t.ResetTimer()
+ for i := 0; i < t.N; i++ {
+ h.Reset()
+ h.Write(data)
+ h.Sum(nil)
+ }
+}
+
+// benchmarks the minimum hashing time for a balanced (for simplicity) BMT
+// by doing count/segmentsize parallel hashings of 2*segmentsize bytes
+// doing it on n PoolSize each reusing the base hasher
+// the premise is that this is the minimum computation needed for a BMT
+// therefore this serves as a theoretical optimum for concurrent implementations
+func benchmarkBMTBaseline(n int, t *testing.B) {
+ hasher := sha3.NewKeccak256
+ hashSize := hasher().Size()
+ data := newData(hashSize)
+
+ t.ReportAllocs()
+ t.ResetTimer()
+ for i := 0; i < t.N; i++ {
+ count := int32((n-1)/hashSize + 1)
+ wg := sync.WaitGroup{}
+ wg.Add(PoolSize)
+ var i int32
+ for j := 0; j < PoolSize; j++ {
+ go func() {
+ defer wg.Done()
+ h := hasher()
+ for atomic.AddInt32(&i, 1) < count {
+ h.Reset()
+ h.Write(data)
+ h.Sum(nil)
+ }
+ }()
+ }
+ wg.Wait()
+ }
+}
+
+// benchmarks BMT Hasher
+func benchmarkBMTHasher(n int, t *testing.B) {
+ data := newData(n)
+ hasher := sha3.NewKeccak256
+ pool := NewTreePool(hasher, SegmentCount, PoolSize)
+
+ t.ReportAllocs()
+ t.ResetTimer()
+ for i := 0; i < t.N; i++ {
+ bmt := New(pool)
+ Hash(bmt, nil, data)
+ }
+}
+
+// benchmarks 100 concurrent bmt hashes with pool capacity
+func benchmarkBMTHasherPool(poolsize, n int, t *testing.B) {
+ data := newData(n)
+ hasher := sha3.NewKeccak256
+ pool := NewTreePool(hasher, SegmentCount, poolsize)
+ cycles := 100
+
+ t.ReportAllocs()
+ t.ResetTimer()
+ wg := sync.WaitGroup{}
+ for i := 0; i < t.N; i++ {
+ wg.Add(cycles)
+ for j := 0; j < cycles; j++ {
+ go func() {
+ defer wg.Done()
+ bmt := New(pool)
+ Hash(bmt, nil, data)
+ }()
+ }
+ wg.Wait()
+ }
+}
+
+// benchmarks the reference hasher
+func benchmarkRefHasher(n int, t *testing.B) {
+ data := newData(n)
+ hasher := sha3.NewKeccak256
+ rbmt := NewRefHasher(hasher, 128)
+
+ t.ReportAllocs()
+ t.ResetTimer()
+ for i := 0; i < t.N; i++ {
+ rbmt.Hash(data)
+ }
+}
+
+func newData(bufferSize int) []byte {
+ data := make([]byte, bufferSize)
+ _, err := io.ReadFull(crand.Reader, data)
+ if err != nil {
+ panic(err.Error())
+ }
+ return data
+}