4 files changed, 405 insertions, 86 deletions
diff --git a/common/bitutil/bitutil.go b/common/bitutil/bitutil.go
new file mode 100644
index 000000000..117616543
--- /dev/null
+++ b/common/bitutil/bitutil.go
@@ -0,0 +1,188 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Adapted from: https://golang.org/src/crypto/cipher/xor.go
+
+// Package bitutil implements fast bitwise operations.
+package bitutil
+
+import (
+	"runtime"
+	"unsafe"
+)
+
+const wordSize = int(unsafe.Sizeof(uintptr(0)))
+const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
+
+// XORBytes xors the bytes in a and b. The destination is assumed to have enough
+// space. Returns the number of bytes xor'd.
+func XORBytes(dst, a, b []byte) int {
+	if supportsUnaligned {
+		return fastXORBytes(dst, a, b)
+	}
+	return safeXORBytes(dst, a, b)
+}
+
+// fastXORBytes xors in bulk. It only works on architectures that support
+// unaligned read/writes.
+func fastXORBytes(dst, a, b []byte) int {
+	n := len(a)
+	if len(b) < n {
+		n = len(b)
+	}
+	w := n / wordSize
+	if w > 0 {
+		dw := *(*[]uintptr)(unsafe.Pointer(&dst))
+		aw := *(*[]uintptr)(unsafe.Pointer(&a))
+		bw := *(*[]uintptr)(unsafe.Pointer(&b))
+		for i := 0; i < w; i++ {
+			dw[i] = aw[i] ^ bw[i]
+		}
+	}
+	for i := (n - n%wordSize); i < n; i++ {
+		dst[i] = a[i] ^ b[i]
+	}
+	return n
+}
+
+// safeXORBytes xors one by one. It works on all architectures, independent if
+// it supports unaligned read/writes or not.
+func safeXORBytes(dst, a, b []byte) int {
+	n := len(a)
+	if len(b) < n {
+		n = len(b)
+	}
+	for i := 0; i < n; i++ {
+		dst[i] = a[i] ^ b[i]
+	}
+	return n
+}
+
+// ANDBytes ands the bytes in a and b. The destination is assumed to have enough
+// space. Returns the number of bytes and'd.
+func ANDBytes(dst, a, b []byte) int {
+	if supportsUnaligned {
+		return fastANDBytes(dst, a, b)
+	}
+	return safeANDBytes(dst, a, b)
+}
+
+// fastANDBytes ands in bulk. It only works on architectures that support
+// unaligned read/writes.
+func fastANDBytes(dst, a, b []byte) int {
+	n := len(a)
+	if len(b) < n {
+		n = len(b)
+	}
+	w := n / wordSize
+	if w > 0 {
+		dw := *(*[]uintptr)(unsafe.Pointer(&dst))
+		aw := *(*[]uintptr)(unsafe.Pointer(&a))
+		bw := *(*[]uintptr)(unsafe.Pointer(&b))
+		for i := 0; i < w; i++ {
+			dw[i] = aw[i] & bw[i]
+		}
+	}
+	for i := (n - n%wordSize); i < n; i++ {
+		dst[i] = a[i] & b[i]
+	}
+	return n
+}
+
+// safeANDBytes ands one by one. It works on all architectures, independent if
+// it supports unaligned read/writes or not.
+func safeANDBytes(dst, a, b []byte) int {
+	n := len(a)
+	if len(b) < n {
+		n = len(b)
+	}
+	for i := 0; i < n; i++ {
+		dst[i] = a[i] & b[i]
+	}
+	return n
+}
+
+// ORBytes ors the bytes in a and b. The destination is assumed to have enough
+// space. Returns the number of bytes or'd.
+func ORBytes(dst, a, b []byte) int {
+	if supportsUnaligned {
+		return fastORBytes(dst, a, b)
+	}
+	return safeORBytes(dst, a, b)
+}
+
+// fastORBytes ors in bulk. It only works on architectures that support
+// unaligned read/writes.
+func fastORBytes(dst, a, b []byte) int {
+	n := len(a)
+	if len(b) < n {
+		n = len(b)
+	}
+	w := n / wordSize
+	if w > 0 {
+		dw := *(*[]uintptr)(unsafe.Pointer(&dst))
+		aw := *(*[]uintptr)(unsafe.Pointer(&a))
+		bw := *(*[]uintptr)(unsafe.Pointer(&b))
+		for i := 0; i < w; i++ {
+			dw[i] = aw[i] | bw[i]
+		}
+	}
+	for i := (n - n%wordSize); i < n; i++ {
+		dst[i] = a[i] | b[i]
+	}
+	return n
+}
+
+// safeORBytes ors one by one. It works on all architectures, independent if
+// it supports unaligned read/writes or not.
+func safeORBytes(dst, a, b []byte) int {
+	n := len(a)
+	if len(b) < n {
+		n = len(b)
+	}
+	for i := 0; i < n; i++ {
+		dst[i] = a[i] | b[i]
+	}
+	return n
+}
+
+// TestBytes tests whether any bit is set in the input byte slice.
+func TestBytes(p []byte) bool {
+	if supportsUnaligned {
+		return fastTestBytes(p)
+	}
+	return safeTestBytes(p)
+}
+
+// fastTestBytes tests for set bits in bulk. It only works on architectures that
+// support unaligned read/writes.
+func fastTestBytes(p []byte) bool {
+	n := len(p)
+	w := n / wordSize
+	if w > 0 {
+		pw := *(*[]uintptr)(unsafe.Pointer(&p))
+		for i := 0; i < w; i++ {
+			if pw[i] != 0 {
+				return true
+			}
+		}
+	}
+	for i := (n - n%wordSize); i < n; i++ {
+		if p[i] != 0 {
+			return true
+		}
+	}
+	return false
+}
+
+// safeTestBytes tests for set bits one byte at a time. It works on all
+// architectures, independent if it supports unaligned read/writes or not.
+func safeTestBytes(p []byte) bool {
+	for i := 0; i < len(p); i++ {
+		if p[i] != 0 {
+			return true
+		}
+	}
+	return false
+}
diff --git a/common/bitutil/bitutil_test.go b/common/bitutil/bitutil_test.go
new file mode 100644
index 000000000..93647031e
--- /dev/null
+++ b/common/bitutil/bitutil_test.go
@@ -0,0 +1,215 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Adapted from: https://golang.org/src/crypto/cipher/xor_test.go
+
+package bitutil
+
+import (
+	"bytes"
+	"testing"
+)
+
+// Tests that bitwise XOR works for various alignments.
+func TestXOR(t *testing.T) {
+	for alignP := 0; alignP < 2; alignP++ {
+		for alignQ := 0; alignQ < 2; alignQ++ {
+			for alignD := 0; alignD < 2; alignD++ {
+				p := make([]byte, 1023)[alignP:]
+				q := make([]byte, 1023)[alignQ:]
+
+				for i := 0; i < len(p); i++ {
+					p[i] = byte(i)
+				}
+				for i := 0; i < len(q); i++ {
+					q[i] = byte(len(q) - i)
+				}
+				d1 := make([]byte, 1023+alignD)[alignD:]
+				d2 := make([]byte, 1023+alignD)[alignD:]
+
+				XORBytes(d1, p, q)
+				safeXORBytes(d2, p, q)
+				if !bytes.Equal(d1, d2) {
+					t.Error("not equal", d1, d2)
+				}
+			}
+		}
+	}
+}
+
+// Tests that bitwise AND works for various alignments.
+func TestAND(t *testing.T) {
+	for alignP := 0; alignP < 2; alignP++ {
+		for alignQ := 0; alignQ < 2; alignQ++ {
+			for alignD := 0; alignD < 2; alignD++ {
+				p := make([]byte, 1023)[alignP:]
+				q := make([]byte, 1023)[alignQ:]
+
+				for i := 0; i < len(p); i++ {
+					p[i] = byte(i)
+				}
+				for i := 0; i < len(q); i++ {
+					q[i] = byte(len(q) - i)
+				}
+				d1 := make([]byte, 1023+alignD)[alignD:]
+				d2 := make([]byte, 1023+alignD)[alignD:]
+
+				ANDBytes(d1, p, q)
+				safeANDBytes(d2, p, q)
+				if !bytes.Equal(d1, d2) {
+					t.Error("not equal")
+				}
+			}
+		}
+	}
+}
+
+// Tests that bitwise OR works for various alignments.
+func TestOR(t *testing.T) {
+	for alignP := 0; alignP < 2; alignP++ {
+		for alignQ := 0; alignQ < 2; alignQ++ {
+			for alignD := 0; alignD < 2; alignD++ {
+				p := make([]byte, 1023)[alignP:]
+				q := make([]byte, 1023)[alignQ:]
+
+				for i := 0; i < len(p); i++ {
+					p[i] = byte(i)
+				}
+				for i := 0; i < len(q); i++ {
+					q[i] = byte(len(q) - i)
+				}
+				d1 := make([]byte, 1023+alignD)[alignD:]
+				d2 := make([]byte, 1023+alignD)[alignD:]
+
+				ORBytes(d1, p, q)
+				safeORBytes(d2, p, q)
+				if !bytes.Equal(d1, d2) {
+					t.Error("not equal")
+				}
+			}
+		}
+	}
+}
+
+// Tests that bit testing works for various alignments.
+func TestTest(t *testing.T) {
+	for align := 0; align < 2; align++ {
+		// Test for bits set in the bulk part
+		p := make([]byte, 1023)[align:]
+		p[100] = 1
+
+		if TestBytes(p) != safeTestBytes(p) {
+			t.Error("not equal")
+		}
+		// Test for bits set in the tail part
+		q := make([]byte, 1023)[align:]
+		q[len(q)-1] = 1
+
+		if TestBytes(q) != safeTestBytes(q) {
+			t.Error("not equal")
+		}
+	}
+}
+
+// Benchmarks the potentially optimized XOR performance.
+func BenchmarkFastXOR1KB(b *testing.B) { benchmarkFastXOR(b, 1024) }
+func BenchmarkFastXOR2KB(b *testing.B) { benchmarkFastXOR(b, 2048) }
+func BenchmarkFastXOR4KB(b *testing.B) { benchmarkFastXOR(b, 4096) }
+
+func benchmarkFastXOR(b *testing.B, size int) {
+	p, q := make([]byte, size), make([]byte, size)
+
+	for i := 0; i < b.N; i++ {
+		XORBytes(p, p, q)
+	}
+}
+
+// Benchmarks the baseline XOR performance.
+func BenchmarkBaseXOR1KB(b *testing.B) { benchmarkBaseXOR(b, 1024) }
+func BenchmarkBaseXOR2KB(b *testing.B) { benchmarkBaseXOR(b, 2048) }
+func BenchmarkBaseXOR4KB(b *testing.B) { benchmarkBaseXOR(b, 4096) }
+
+func benchmarkBaseXOR(b *testing.B, size int) {
+	p, q := make([]byte, size), make([]byte, size)
+
+	for i := 0; i < b.N; i++ {
+		safeXORBytes(p, p, q)
+	}
+}
+
+// Benchmarks the potentially optimized AND performance.
+func BenchmarkFastAND1KB(b *testing.B) { benchmarkFastAND(b, 1024) }
+func BenchmarkFastAND2KB(b *testing.B) { benchmarkFastAND(b, 2048) }
+func BenchmarkFastAND4KB(b *testing.B) { benchmarkFastAND(b, 4096) }
+
+func benchmarkFastAND(b *testing.B, size int) {
+	p, q := make([]byte, size), make([]byte, size)
+
+	for i := 0; i < b.N; i++ {
+		ANDBytes(p, p, q)
+	}
+}
+
+// Benchmarks the baseline AND performance.
+func BenchmarkBaseAND1KB(b *testing.B) { benchmarkBaseAND(b, 1024) }
+func BenchmarkBaseAND2KB(b *testing.B) { benchmarkBaseAND(b, 2048) }
+func BenchmarkBaseAND4KB(b *testing.B) { benchmarkBaseAND(b, 4096) }
+
+func benchmarkBaseAND(b *testing.B, size int) {
+	p, q := make([]byte, size), make([]byte, size)
+
+	for i := 0; i < b.N; i++ {
+		safeANDBytes(p, p, q)
+	}
+}
+
+// Benchmarks the potentially optimized OR performance.
+func BenchmarkFastOR1KB(b *testing.B) { benchmarkFastOR(b, 1024) }
+func BenchmarkFastOR2KB(b *testing.B) { benchmarkFastOR(b, 2048) }
+func BenchmarkFastOR4KB(b *testing.B) { benchmarkFastOR(b, 4096) }
+
+func benchmarkFastOR(b *testing.B, size int) {
+	p, q := make([]byte, size), make([]byte, size)
+
+	for i := 0; i < b.N; i++ {
+		ORBytes(p, p, q)
+	}
+}
+
+// Benchmarks the baseline OR performance.
+func BenchmarkBaseOR1KB(b *testing.B) { benchmarkBaseOR(b, 1024) }
+func BenchmarkBaseOR2KB(b *testing.B) { benchmarkBaseOR(b, 2048) }
+func BenchmarkBaseOR4KB(b *testing.B) { benchmarkBaseOR(b, 4096) }
+
+func benchmarkBaseOR(b *testing.B, size int) {
+	p, q := make([]byte, size), make([]byte, size)
+
+	for i := 0; i < b.N; i++ {
+		safeORBytes(p, p, q)
+	}
+}
+
+// Benchmarks the potentially optimized bit testing performance.
+func BenchmarkFastTest1KB(b *testing.B) { benchmarkFastTest(b, 1024) }
+func BenchmarkFastTest2KB(b *testing.B) { benchmarkFastTest(b, 2048) }
+func BenchmarkFastTest4KB(b *testing.B) { benchmarkFastTest(b, 4096) }
+
+func benchmarkFastTest(b *testing.B, size int) {
+	p := make([]byte, size)
+	for i := 0; i < b.N; i++ {
+		TestBytes(p)
+	}
+}
+
+// Benchmarks the baseline bit testing performance.
+func BenchmarkBaseTest1KB(b *testing.B) { benchmarkBaseTest(b, 1024) }
+func BenchmarkBaseTest2KB(b *testing.B) { benchmarkBaseTest(b, 2048) }
+func BenchmarkBaseTest4KB(b *testing.B) { benchmarkBaseTest(b, 4096) }
+
+func benchmarkBaseTest(b *testing.B, size int) {
+	p := make([]byte, size)
+	for i := 0; i < b.N; i++ {
+		safeTestBytes(p)
+	}
+}
diff --git a/consensus/ethash/algorithm.go b/consensus/ethash/algorithm.go
index 7e8fbfc37..365fe5520 100644
--- a/consensus/ethash/algorithm.go
+++ b/consensus/ethash/algorithm.go
@@ -27,6 +27,7 @@ import (
 	"unsafe"
 
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/common/bitutil"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/crypto/sha3"
 	"github.com/ethereum/go-ethereum/log"
@@ -142,7 +143,7 @@ func generateCache(dest []uint32, epoch uint64, seed []byte) {
 				dstOff = j * hashBytes
 				xorOff = (binary.LittleEndian.Uint32(cache[dstOff:]) % uint32(rows)) * hashBytes
 			)
-			xorBytes(temp, cache[srcOff:srcOff+hashBytes], cache[xorOff:xorOff+hashBytes])
+			bitutil.XORBytes(temp, cache[srcOff:srcOff+hashBytes], cache[xorOff:xorOff+hashBytes])
 			keccak512(cache[dstOff:], temp)
 
 			atomic.AddUint32(&progress, 1)
diff --git a/consensus/ethash/xor.go b/consensus/ethash/xor.go
deleted file mode 100644
index 90e232746..000000000
--- a/consensus/ethash/xor.go
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Source: https://golang.org/src/crypto/cipher/xor.go
-
-package ethash
-
-import (
-	"runtime"
-	"unsafe"
-)
-
-const wordSize = int(unsafe.Sizeof(uintptr(0)))
-const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
-
-// fastXORBytes xors in bulk. It only works on architectures that
-// support unaligned read/writes.
-func fastXORBytes(dst, a, b []byte) int {
-	n := len(a)
-	if len(b) < n {
-		n = len(b)
-	}
-
-	w := n / wordSize
-	if w > 0 {
-		dw := *(*[]uintptr)(unsafe.Pointer(&dst))
-		aw := *(*[]uintptr)(unsafe.Pointer(&a))
-		bw := *(*[]uintptr)(unsafe.Pointer(&b))
-		for i := 0; i < w; i++ {
-			dw[i] = aw[i] ^ bw[i]
-		}
-	}
-
-	for i := (n - n%wordSize); i < n; i++ {
-		dst[i] = a[i] ^ b[i]
-	}
-
-	return n
-}
-
-func safeXORBytes(dst, a, b []byte) int {
-	n := len(a)
-	if len(b) < n {
-		n = len(b)
-	}
-	for i := 0; i < n; i++ {
-		dst[i] = a[i] ^ b[i]
-	}
-	return n
-}
-
-// xorBytes xors the bytes in a and b. The destination is assumed to have enough
-// space. Returns the number of bytes xor'd.
-func xorBytes(dst, a, b []byte) int {
-	if supportsUnaligned {
-		return fastXORBytes(dst, a, b)
-	}
-	// TODO(hanwen): if (dst, a, b) have common alignment
-	// we could still try fastXORBytes. It is not clear
-	// how often this happens, and it's only worth it if
-	// the block encryption itself is hardware
-	// accelerated.
-	return safeXORBytes(dst, a, b)
-}
-
-// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
-// The arguments are assumed to be of equal length.
-func fastXORWords(dst, a, b []byte) {
-	dw := *(*[]uintptr)(unsafe.Pointer(&dst))
-	aw := *(*[]uintptr)(unsafe.Pointer(&a))
-	bw := *(*[]uintptr)(unsafe.Pointer(&b))
-	n := len(b) / wordSize
-	for i := 0; i < n; i++ {
-		dw[i] = aw[i] ^ bw[i]
-	}
-}
-
-func xorWords(dst, a, b []byte) {
-	if supportsUnaligned {
-		fastXORWords(dst, a, b)
-	} else {
-		safeXORBytes(dst, a, b)
-	}
-}