From 36a800a1d299836e6fef226db54390044829a00e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 5 May 2017 16:00:11 +0300 Subject: common/bitutil, consensus/ethash: reusable bitutil package --- common/bitutil/bitutil.go | 188 +++++++++++++++++++++++++++++++++++ common/bitutil/bitutil_test.go | 215 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 403 insertions(+) create mode 100644 common/bitutil/bitutil.go create mode 100644 common/bitutil/bitutil_test.go (limited to 'common') diff --git a/common/bitutil/bitutil.go b/common/bitutil/bitutil.go new file mode 100644 index 000000000..117616543 --- /dev/null +++ b/common/bitutil/bitutil.go @@ -0,0 +1,188 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Adapted from: https://golang.org/src/crypto/cipher/xor.go + +// Package bitutil implements fast bitwise operations. +package bitutil + +import ( + "runtime" + "unsafe" +) + +const wordSize = int(unsafe.Sizeof(uintptr(0))) +const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" + +// XORBytes xors the bytes in a and b. The destination is assumed to have enough +// space. Returns the number of bytes xor'd. +func XORBytes(dst, a, b []byte) int { + if supportsUnaligned { + return fastXORBytes(dst, a, b) + } + return safeXORBytes(dst, a, b) +} + +// fastXORBytes xors in bulk. It only works on architectures that support +// unaligned read/writes. +func fastXORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] ^ bw[i] + } + } + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] ^ b[i] + } + return n +} + +// safeXORBytes xors one by one. It works on all architectures, independent if +// it supports unaligned read/writes or not. +func safeXORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + dst[i] = a[i] ^ b[i] + } + return n +} + +// ANDBytes ands the bytes in a and b. The destination is assumed to have enough +// space. Returns the number of bytes and'd. +func ANDBytes(dst, a, b []byte) int { + if supportsUnaligned { + return fastANDBytes(dst, a, b) + } + return safeANDBytes(dst, a, b) +} + +// fastANDBytes ands in bulk. It only works on architectures that support +// unaligned read/writes. +func fastANDBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] & bw[i] + } + } + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] & b[i] + } + return n +} + +// safeANDBytes ands one by one. It works on all architectures, independent if +// it supports unaligned read/writes or not. +func safeANDBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + dst[i] = a[i] & b[i] + } + return n +} + +// ORBytes ors the bytes in a and b. The destination is assumed to have enough +// space. Returns the number of bytes or'd. +func ORBytes(dst, a, b []byte) int { + if supportsUnaligned { + return fastORBytes(dst, a, b) + } + return safeORBytes(dst, a, b) +} + +// fastORBytes ors in bulk. It only works on architectures that support +// unaligned read/writes. +func fastORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] | bw[i] + } + } + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] | b[i] + } + return n +} + +// safeORBytes ors one by one. It works on all architectures, independent if +// it supports unaligned read/writes or not. +func safeORBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + dst[i] = a[i] | b[i] + } + return n +} + +// TestBytes tests whether any bit is set in the input byte slice. +func TestBytes(p []byte) bool { + if supportsUnaligned { + return fastTestBytes(p) + } + return safeTestBytes(p) +} + +// fastTestBytes tests for set bits in bulk. It only works on architectures that +// support unaligned read/writes. +func fastTestBytes(p []byte) bool { + n := len(p) + w := n / wordSize + if w > 0 { + pw := *(*[]uintptr)(unsafe.Pointer(&p)) + for i := 0; i < w; i++ { + if pw[i] != 0 { + return true + } + } + } + for i := (n - n%wordSize); i < n; i++ { + if p[i] != 0 { + return true + } + } + return false +} + +// safeTestBytes tests for set bits one byte at a time. It works on all +// architectures, independent if it supports unaligned read/writes or not. +func safeTestBytes(p []byte) bool { + for i := 0; i < len(p); i++ { + if p[i] != 0 { + return true + } + } + return false +} diff --git a/common/bitutil/bitutil_test.go b/common/bitutil/bitutil_test.go new file mode 100644 index 000000000..93647031e --- /dev/null +++ b/common/bitutil/bitutil_test.go @@ -0,0 +1,215 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Adapted from: https://golang.org/src/crypto/cipher/xor_test.go + +package bitutil + +import ( + "bytes" + "testing" +) + +// Tests that bitwise XOR works for various alignments. +func TestXOR(t *testing.T) { + for alignP := 0; alignP < 2; alignP++ { + for alignQ := 0; alignQ < 2; alignQ++ { + for alignD := 0; alignD < 2; alignD++ { + p := make([]byte, 1023)[alignP:] + q := make([]byte, 1023)[alignQ:] + + for i := 0; i < len(p); i++ { + p[i] = byte(i) + } + for i := 0; i < len(q); i++ { + q[i] = byte(len(q) - i) + } + d1 := make([]byte, 1023+alignD)[alignD:] + d2 := make([]byte, 1023+alignD)[alignD:] + + XORBytes(d1, p, q) + safeXORBytes(d2, p, q) + if !bytes.Equal(d1, d2) { + t.Error("not equal", d1, d2) + } + } + } + } +} + +// Tests that bitwise AND works for various alignments. +func TestAND(t *testing.T) { + for alignP := 0; alignP < 2; alignP++ { + for alignQ := 0; alignQ < 2; alignQ++ { + for alignD := 0; alignD < 2; alignD++ { + p := make([]byte, 1023)[alignP:] + q := make([]byte, 1023)[alignQ:] + + for i := 0; i < len(p); i++ { + p[i] = byte(i) + } + for i := 0; i < len(q); i++ { + q[i] = byte(len(q) - i) + } + d1 := make([]byte, 1023+alignD)[alignD:] + d2 := make([]byte, 1023+alignD)[alignD:] + + ANDBytes(d1, p, q) + safeANDBytes(d2, p, q) + if !bytes.Equal(d1, d2) { + t.Error("not equal") + } + } + } + } +} + +// Tests that bitwise OR works for various alignments. +func TestOR(t *testing.T) { + for alignP := 0; alignP < 2; alignP++ { + for alignQ := 0; alignQ < 2; alignQ++ { + for alignD := 0; alignD < 2; alignD++ { + p := make([]byte, 1023)[alignP:] + q := make([]byte, 1023)[alignQ:] + + for i := 0; i < len(p); i++ { + p[i] = byte(i) + } + for i := 0; i < len(q); i++ { + q[i] = byte(len(q) - i) + } + d1 := make([]byte, 1023+alignD)[alignD:] + d2 := make([]byte, 1023+alignD)[alignD:] + + ORBytes(d1, p, q) + safeORBytes(d2, p, q) + if !bytes.Equal(d1, d2) { + t.Error("not equal") + } + } + } + } +} + +// Tests that bit testing works for various alignments. +func TestTest(t *testing.T) { + for align := 0; align < 2; align++ { + // Test for bits set in the bulk part + p := make([]byte, 1023)[align:] + p[100] = 1 + + if TestBytes(p) != safeTestBytes(p) { + t.Error("not equal") + } + // Test for bits set in the tail part + q := make([]byte, 1023)[align:] + q[len(q)-1] = 1 + + if TestBytes(q) != safeTestBytes(q) { + t.Error("not equal") + } + } +} + +// Benchmarks the potentially optimized XOR performance. +func BenchmarkFastXOR1KB(b *testing.B) { benchmarkFastXOR(b, 1024) } +func BenchmarkFastXOR2KB(b *testing.B) { benchmarkFastXOR(b, 2048) } +func BenchmarkFastXOR4KB(b *testing.B) { benchmarkFastXOR(b, 4096) } + +func benchmarkFastXOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + XORBytes(p, p, q) + } +} + +// Benchmarks the baseline XOR performance. +func BenchmarkBaseXOR1KB(b *testing.B) { benchmarkBaseXOR(b, 1024) } +func BenchmarkBaseXOR2KB(b *testing.B) { benchmarkBaseXOR(b, 2048) } +func BenchmarkBaseXOR4KB(b *testing.B) { benchmarkBaseXOR(b, 4096) } + +func benchmarkBaseXOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + safeXORBytes(p, p, q) + } +} + +// Benchmarks the potentially optimized AND performance. +func BenchmarkFastAND1KB(b *testing.B) { benchmarkFastAND(b, 1024) } +func BenchmarkFastAND2KB(b *testing.B) { benchmarkFastAND(b, 2048) } +func BenchmarkFastAND4KB(b *testing.B) { benchmarkFastAND(b, 4096) } + +func benchmarkFastAND(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + ANDBytes(p, p, q) + } +} + +// Benchmarks the baseline AND performance. +func BenchmarkBaseAND1KB(b *testing.B) { benchmarkBaseAND(b, 1024) } +func BenchmarkBaseAND2KB(b *testing.B) { benchmarkBaseAND(b, 2048) } +func BenchmarkBaseAND4KB(b *testing.B) { benchmarkBaseAND(b, 4096) } + +func benchmarkBaseAND(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + safeANDBytes(p, p, q) + } +} + +// Benchmarks the potentially optimized OR performance. +func BenchmarkFastOR1KB(b *testing.B) { benchmarkFastOR(b, 1024) } +func BenchmarkFastOR2KB(b *testing.B) { benchmarkFastOR(b, 2048) } +func BenchmarkFastOR4KB(b *testing.B) { benchmarkFastOR(b, 4096) } + +func benchmarkFastOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + ORBytes(p, p, q) + } +} + +// Benchmarks the baseline OR performance. +func BenchmarkBaseOR1KB(b *testing.B) { benchmarkBaseOR(b, 1024) } +func BenchmarkBaseOR2KB(b *testing.B) { benchmarkBaseOR(b, 2048) } +func BenchmarkBaseOR4KB(b *testing.B) { benchmarkBaseOR(b, 4096) } + +func benchmarkBaseOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + safeORBytes(p, p, q) + } +} + +// Benchmarks the potentially optimized bit testing performance. +func BenchmarkFastTest1KB(b *testing.B) { benchmarkFastTest(b, 1024) } +func BenchmarkFastTest2KB(b *testing.B) { benchmarkFastTest(b, 2048) } +func BenchmarkFastTest4KB(b *testing.B) { benchmarkFastTest(b, 4096) } + +func benchmarkFastTest(b *testing.B, size int) { + p := make([]byte, size) + for i := 0; i < b.N; i++ { + TestBytes(p) + } +} + +// Benchmarks the baseline bit testing performance. +func BenchmarkBaseTest1KB(b *testing.B) { benchmarkBaseTest(b, 1024) } +func BenchmarkBaseTest2KB(b *testing.B) { benchmarkBaseTest(b, 2048) } +func BenchmarkBaseTest4KB(b *testing.B) { benchmarkBaseTest(b, 4096) } + +func benchmarkBaseTest(b *testing.B, size int) { + p := make([]byte, size) + for i := 0; i < b.N; i++ { + safeTestBytes(p) + } +} -- cgit v1.2.3