aboutsummaryrefslogtreecommitdiffstats
path: root/core/bloombits
diff options
context:
space:
mode:
authorPéter Szilágyi <peterke@gmail.com>2017-08-29 19:13:11 +0800
committerPéter Szilágyi <peterke@gmail.com>2017-09-06 16:14:19 +0800
commitf585f9eee8cb18423c23fe8b517b5b4cbe3b3755 (patch)
tree08c232ee58318c20f971cf8e3f5dfa09f1e2caf7 /core/bloombits
parent4ea4d2dc3473afd9d2eda6ef6b359accce1f0946 (diff)
downloaddexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar
dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar.gz
dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar.bz2
dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar.lz
dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar.xz
dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar.zst
dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.zip
core, eth: clean up bloom filtering, add some tests
Diffstat (limited to 'core/bloombits')
-rw-r--r--core/bloombits/doc.go18
-rw-r--r--core/bloombits/fetcher_test.go101
-rw-r--r--core/bloombits/generator.go84
-rw-r--r--core/bloombits/generator_test.go60
-rw-r--r--core/bloombits/matcher.go878
-rw-r--r--core/bloombits/matcher_test.go283
-rw-r--r--core/bloombits/scheduler.go181
-rw-r--r--core/bloombits/scheduler_test.go105
-rw-r--r--core/bloombits/utils.go63
9 files changed, 1086 insertions, 687 deletions
diff --git a/core/bloombits/doc.go b/core/bloombits/doc.go
new file mode 100644
index 000000000..3d159e74f
--- /dev/null
+++ b/core/bloombits/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Package bloombits implements bloom filtering on batches of data.
+package bloombits
diff --git a/core/bloombits/fetcher_test.go b/core/bloombits/fetcher_test.go
deleted file mode 100644
index 9c229cf8d..000000000
--- a/core/bloombits/fetcher_test.go
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2017 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-package bloombits
-
-import (
- "bytes"
- "encoding/binary"
- "math/rand"
- "sync"
- "sync/atomic"
- "testing"
- "time"
-)
-
-const testFetcherReqCount = 5000
-
-func fetcherTestVector(b uint, s uint64) []byte {
- r := make([]byte, 10)
- binary.BigEndian.PutUint16(r[0:2], uint16(b))
- binary.BigEndian.PutUint64(r[2:10], s)
- return r
-}
-
-func TestFetcher(t *testing.T) {
- testFetcher(t, 1)
-}
-
-func TestFetcherMultipleReaders(t *testing.T) {
- testFetcher(t, 10)
-}
-
-func testFetcher(t *testing.T, cnt int) {
- f := &fetcher{
- requestMap: make(map[uint64]fetchRequest),
- }
- distCh := make(chan distRequest, channelCap)
- stop := make(chan struct{})
- var reqCount uint32
-
- for i := 0; i < 10; i++ {
- go func() {
- for {
- req, ok := <-distCh
- if !ok {
- return
- }
- time.Sleep(time.Duration(rand.Intn(100000)))
- atomic.AddUint32(&reqCount, 1)
- f.deliver([]uint64{req.sectionIndex}, [][]byte{fetcherTestVector(req.bloomIndex, req.sectionIndex)})
- }
- }()
- }
-
- var wg, wg2 sync.WaitGroup
- for cc := 0; cc < cnt; cc++ {
- wg.Add(1)
- in := make(chan uint64, channelCap)
- out := f.fetch(in, distCh, stop, &wg2)
-
- time.Sleep(time.Millisecond * 10 * time.Duration(cc))
- go func() {
- for i := uint64(0); i < testFetcherReqCount; i++ {
- in <- i
- }
- }()
-
- go func() {
- for i := uint64(0); i < testFetcherReqCount; i++ {
- bv := <-out
- if !bytes.Equal(bv, fetcherTestVector(0, i)) {
- if len(bv) != 10 {
- t.Errorf("Vector #%d length is %d, expected 10", i, len(bv))
- } else {
- j := binary.BigEndian.Uint64(bv[2:10])
- t.Errorf("Expected vector #%d, fetched #%d", i, j)
- }
- }
- }
- wg.Done()
- }()
- }
-
- wg.Wait()
- close(stop)
- if reqCount != testFetcherReqCount {
- t.Errorf("Request count mismatch: expected %v, got %v", testFetcherReqCount, reqCount)
- }
-}
diff --git a/core/bloombits/generator.go b/core/bloombits/generator.go
new file mode 100644
index 000000000..04a7f5146
--- /dev/null
+++ b/core/bloombits/generator.go
@@ -0,0 +1,84 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+ "errors"
+
+ "github.com/ethereum/go-ethereum/core/types"
+)
+
+// errSectionOutOfBounds is returned if the user tried to add more bloom filters
+// to the batch than available space, or if tries to retrieve above the capacity,
+var errSectionOutOfBounds = errors.New("section out of bounds")
+
+// Generator takes a number of bloom filters and generates the rotated bloom bits
+// to be used for batched filtering.
+type Generator struct {
+ blooms [types.BloomBitLength][]byte // Rotated blooms for per-bit matching
+ sections uint // Number of sections to batch together
+ nextBit uint // Next bit to set when adding a bloom
+}
+
+// NewGenerator creates a rotated bloom generator that can iteratively fill a
+// batched bloom filter's bits.
+func NewGenerator(sections uint) (*Generator, error) {
+ if sections%8 != 0 {
+ return nil, errors.New("section count not multiple of 8")
+ }
+ b := &Generator{sections: sections}
+ for i := 0; i < types.BloomBitLength; i++ {
+ b.blooms[i] = make([]byte, sections/8)
+ }
+ return b, nil
+}
+
+// AddBloom takes a single bloom filter and sets the corresponding bit column
+// in memory accordingly.
+func (b *Generator) AddBloom(bloom types.Bloom) error {
+ // Make sure we're not adding more bloom filters than our capacity
+ if b.nextBit >= b.sections {
+ return errSectionOutOfBounds
+ }
+ // Rotate the bloom and insert into our collection
+ byteMask := b.nextBit / 8
+ bitMask := byte(1) << byte(7-b.nextBit%8)
+
+ for i := 0; i < types.BloomBitLength; i++ {
+ bloomByteMask := types.BloomByteLength - 1 - i/8
+ bloomBitMask := byte(1) << byte(i%8)
+
+ if (bloom[bloomByteMask] & bloomBitMask) != 0 {
+ b.blooms[i][byteMask] |= bitMask
+ }
+ }
+ b.nextBit++
+
+ return nil
+}
+
+// Bitset returns the bit vector belonging to the given bit index after all
+// blooms have been added.
+func (b *Generator) Bitset(idx uint) ([]byte, error) {
+ if b.nextBit != b.sections {
+ return nil, errors.New("bloom not fully generated yet")
+ }
+ if idx >= b.sections {
+ return nil, errSectionOutOfBounds
+ }
+ return b.blooms[idx], nil
+}
diff --git a/core/bloombits/generator_test.go b/core/bloombits/generator_test.go
new file mode 100644
index 000000000..f4aa9551c
--- /dev/null
+++ b/core/bloombits/generator_test.go
@@ -0,0 +1,60 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+ "bytes"
+ "math/rand"
+ "testing"
+
+ "github.com/ethereum/go-ethereum/core/types"
+)
+
+// Tests that batched bloom bits are correctly rotated from the input bloom
+// filters.
+func TestGenerator(t *testing.T) {
+ // Generate the input and the rotated output
+ var input, output [types.BloomBitLength][types.BloomByteLength]byte
+
+ for i := 0; i < types.BloomBitLength; i++ {
+ for j := 0; j < types.BloomBitLength; j++ {
+ bit := byte(rand.Int() % 2)
+
+ input[i][j/8] |= bit << byte(7-j%8)
+ output[types.BloomBitLength-1-j][i/8] |= bit << byte(7-i%8)
+ }
+ }
+ // Crunch the input through the generator and verify the result
+ gen, err := NewGenerator(types.BloomBitLength)
+ if err != nil {
+ t.Fatalf("failed to create bloombit generator: %v", err)
+ }
+ for i, bloom := range input {
+ if err := gen.AddBloom(bloom); err != nil {
+ t.Fatalf("bloom %d: failed to add: %v", i, err)
+ }
+ }
+ for i, want := range output {
+ have, err := gen.Bitset(uint(i))
+ if err != nil {
+ t.Fatalf("output %d: failed to retrieve bits: %v", i, err)
+ }
+ if !bytes.Equal(have, want[:]) {
+ t.Errorf("output %d: bit vector mismatch have %x, want %x", i, have, want)
+ }
+ }
+}
diff --git a/core/bloombits/matcher.go b/core/bloombits/matcher.go
index 5a7df6b1c..e365fd6d0 100644
--- a/core/bloombits/matcher.go
+++ b/core/bloombits/matcher.go
@@ -13,327 +13,350 @@
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
package bloombits
import (
+ "errors"
+ "math"
+ "sort"
"sync"
+ "sync/atomic"
+ "time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/bitutil"
- "github.com/ethereum/go-ethereum/core/types"
+ "github.com/ethereum/go-ethereum/crypto"
)
-const channelCap = 100
+// bloomIndexes represents the bit indexes inside the bloom filter that belong
+// to some key.
+type bloomIndexes [3]uint
-// fetcher handles bit vector retrieval pipelines for a single bit index
-type fetcher struct {
- bloomIndex uint
- requestMap map[uint64]fetchRequest
- requestLock sync.RWMutex
-}
+// calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
+func calcBloomIndexes(b []byte) bloomIndexes {
+ b = crypto.Keccak256(b)
-// fetchRequest represents the state of a bit vector requested from a fetcher. When a distRequest has been sent to the distributor but
-// the data has not been delivered yet, queued is true. When delivered, it is stored in the data field and the delivered channel is closed.
-type fetchRequest struct {
- data []byte
- queued bool
- delivered chan struct{}
+ var idxs bloomIndexes
+ for i := 0; i < len(idxs); i++ {
+ idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
+ }
+ return idxs
}
-// distRequest is sent by the fetcher to the distributor which groups and prioritizes these requests.
-type distRequest struct {
- bloomIndex uint
- sectionIndex uint64
+// partialMatches with a non-nil vector represents a section in which some sub-
+// matchers have already found potential matches. Subsequent sub-matchers will
+// binary AND their matches with this vector. If vector is nil, it represents a
+// section to be processed by the first sub-matcher.
+type partialMatches struct {
+ section uint64
+ bitset []byte
}
-// fetch creates a retrieval pipeline, receiving section indexes from sectionCh and returning the results
-// in the same order through the returned channel. Multiple fetch instances of the same fetcher are allowed
-// to run in parallel, in case the same bit index appears multiple times in the filter structure. Each section
-// is requested only once, requests are sent to the request distributor (part of Matcher) through distCh.
-func (f *fetcher) fetch(sectionCh chan uint64, distCh chan distRequest, stop chan struct{}, wg *sync.WaitGroup) chan []byte {
- dataCh := make(chan []byte, channelCap)
- returnCh := make(chan uint64, channelCap)
- wg.Add(2)
-
- go func() {
- defer wg.Done()
- defer close(returnCh)
-
- for {
- select {
- case <-stop:
- return
- case idx, ok := <-sectionCh:
- if !ok {
- return
- }
-
- req := false
- f.requestLock.Lock()
- r := f.requestMap[idx]
- if r.data == nil {
- req = !r.queued
- r.queued = true
- if r.delivered == nil {
- r.delivered = make(chan struct{})
- }
- f.requestMap[idx] = r
- }
- f.requestLock.Unlock()
- if req {
- distCh <- distRequest{bloomIndex: f.bloomIndex, sectionIndex: idx} // success is guaranteed, distibuteRequests shuts down after fetch
- }
- select {
- case <-stop:
- return
- case returnCh <- idx:
- }
- }
- }
- }()
-
- go func() {
- defer wg.Done()
- defer close(dataCh)
-
- for {
- select {
- case <-stop:
- return
- case idx, ok := <-returnCh:
- if !ok {
- return
- }
-
- f.requestLock.RLock()
- r := f.requestMap[idx]
- f.requestLock.RUnlock()
-
- if r.data == nil {
- select {
- case <-stop:
- return
- case <-r.delivered:
- f.requestLock.RLock()
- r = f.requestMap[idx]
- f.requestLock.RUnlock()
- }
- }
- select {
- case <-stop:
- return
- case dataCh <- r.data:
- }
- }
- }
- }()
-
- return dataCh
+// Retrieval represents a request for retrieval task assignments for a given
+// bit with the given number of fetch elements, or a response for such a request.
+// It can also have the actual results set to be used as a delivery data struct.
+type Retrieval struct {
+ Bit uint
+ Sections []uint64
+ Bitsets [][]byte
}
-// deliver is called by the request distributor when a reply to a request has
-// arrived
-func (f *fetcher) deliver(sectionIdxList []uint64, data [][]byte) {
- f.requestLock.Lock()
- defer f.requestLock.Unlock()
+// Matcher is a pipelined system of schedulers and logic matchers which perform
+// binary AND/OR operations on the bit-streams, creating a stream of potential
+// blocks to inspect for data content.
+type Matcher struct {
+ sectionSize uint64 // Size of the data batches to filter on
- for i, sectionIdx := range sectionIdxList {
- r := f.requestMap[sectionIdx]
- if r.data != nil {
- panic("BloomBits section data delivered twice")
- }
- r.data = data[i]
- close(r.delivered)
- f.requestMap[sectionIdx] = r
- }
-}
+ addresses []bloomIndexes // Addresses the system is filtering for
+ topics [][]bloomIndexes // Topics the system is filtering for
+ schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
-// Matcher is a pipelined structure of fetchers and logic matchers which perform
-// binary AND/OR operations on the bitstreams, finally creating a stream of potential matches.
-type Matcher struct {
- addresses []types.BloomIndexList
- topics [][]types.BloomIndexList
- fetchers map[uint]*fetcher
- sectionSize uint64
-
- distCh chan distRequest
- reqs map[uint][]uint64
- freeQueues map[uint]struct{}
- allocQueue []chan uint
- running bool
- stop chan struct{}
- lock sync.Mutex
- wg, distWg sync.WaitGroup
+ retrievers chan chan uint // Retriever processes waiting for bit allocations
+ counters chan chan uint // Retriever processes waiting for task count reports
+ retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
+ deliveries chan *Retrieval // Retriever processes waiting for task response deliveries
+
+ running uint32 // Atomic flag whether a session is live or not
}
-// NewMatcher creates a new Matcher instance
+// NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
+// address and topic filtering on them.
func NewMatcher(sectionSize uint64, addresses []common.Address, topics [][]common.Hash) *Matcher {
m := &Matcher{
- fetchers: make(map[uint]*fetcher),
- reqs: make(map[uint][]uint64),
- freeQueues: make(map[uint]struct{}),
- distCh: make(chan distRequest, channelCap),
sectionSize: sectionSize,
+ schedulers: make(map[uint]*scheduler),
+ retrievers: make(chan chan uint),
+ counters: make(chan chan uint),
+ retrievals: make(chan chan *Retrieval),
+ deliveries: make(chan *Retrieval),
}
m.setAddresses(addresses)
m.setTopics(topics)
return m
}
-// setAddresses matches only logs that are generated from addresses that are included
-// in the given addresses.
+// setAddresses configures the matcher to only return logs that are generated
+// from addresses that are included in the given list.
func (m *Matcher) setAddresses(addresses []common.Address) {
- m.addresses = make([]types.BloomIndexList, len(addresses))
+ // Calculate the bloom bit indexes for the addresses we're interested in
+ m.addresses = make([]bloomIndexes, len(addresses))
for i, address := range addresses {
- m.addresses[i] = types.BloomIndexes(address.Bytes())
+ m.addresses[i] = calcBloomIndexes(address.Bytes())
}
-
+ // For every bit, create a scheduler to load/download the bit vectors
for _, bloomIndexList := range m.addresses {
for _, bloomIndex := range bloomIndexList {
- m.newFetcher(bloomIndex)
+ m.addScheduler(bloomIndex)
}
}
}
-// setTopics matches only logs that have topics matching the given topics.
-func (m *Matcher) setTopics(topics [][]common.Hash) {
+// setTopics configures the matcher to only return logs that have topics matching
+// the given list.
+func (m *Matcher) setTopics(topicsList [][]common.Hash) {
+ // Calculate the bloom bit indexes for the topics we're interested in
m.topics = nil
-loop:
- for _, topicList := range topics {
- t := make([]types.BloomIndexList, len(topicList))
- for i, topic := range topicList {
- if (topic == common.Hash{}) {
- continue loop
- }
- t[i] = types.BloomIndexes(topic.Bytes())
+
+ for _, topics := range topicsList {
+ bloomBits := make([]bloomIndexes, len(topics))
+ for i, topic := range topics {
+ bloomBits[i] = calcBloomIndexes(topic.Bytes())
}
- m.topics = append(m.topics, t)
+ m.topics = append(m.topics, bloomBits)
}
-
+ // For every bit, create a scheduler to load/download the bit vectors
for _, bloomIndexLists := range m.topics {
for _, bloomIndexList := range bloomIndexLists {
for _, bloomIndex := range bloomIndexList {
- m.newFetcher(bloomIndex)
+ m.addScheduler(bloomIndex)
}
}
}
}
-// match creates a daisy-chain of sub-matchers, one for the address set and one for each topic set, each
-// sub-matcher receiving a section only if the previous ones have all found a potential match in one of
-// the blocks of the section, then binary AND-ing its own matches and forwaring the result to the next one
-func (m *Matcher) match(processCh chan partialMatches) chan partialMatches {
- indexLists := m.topics
- if len(m.addresses) > 0 {
- indexLists = append([][]types.BloomIndexList{m.addresses}, indexLists...)
+// addScheduler adds a bit stream retrieval scheduler for the given bit index if
+// it has not existed before. If the bit is already selected for filtering, the
+// existing scheduler can be used.
+func (m *Matcher) addScheduler(idx uint) {
+ if _, ok := m.schedulers[idx]; ok {
+ return
}
- m.distributeRequests()
+ m.schedulers[idx] = newScheduler(idx)
+}
- for _, subIndexList := range indexLists {
- processCh = m.subMatch(processCh, subIndexList)
+// Start starts the matching process and returns a stream of bloom matches in
+// a given range of blocks. If there are no more matches in the range, the result
+// channel is closed.
+func (m *Matcher) Start(begin, end uint64, results chan uint64) (*MatcherSession, error) {
+ // Make sure we're not creating concurrent sessions
+ if atomic.SwapUint32(&m.running, 1) == 1 {
+ return nil, errors.New("matcher already running")
}
- return processCh
-}
+ defer atomic.StoreUint32(&m.running, 0)
-// partialMatches with a non-nil vector represents a section in which some sub-matchers have already
-// found potential matches. Subsequent sub-matchers will binary AND their matches with this vector.
-// If vector is nil, it represents a section to be processed by the first sub-matcher.
-type partialMatches struct {
- sectionIndex uint64
- vector []byte
+ // Initiate a new matching round
+ session := &MatcherSession{
+ matcher: m,
+ quit: make(chan struct{}),
+ kill: make(chan struct{}),
+ }
+ for _, scheduler := range m.schedulers {
+ scheduler.reset()
+ }
+ sink := m.run(begin, end, cap(results), session)
+
+ // Read the output from the result sink and deliver to the user
+ session.pend.Add(1)
+ go func() {
+ defer session.pend.Done()
+ defer close(results)
+
+ for {
+ select {
+ case <-session.quit:
+ return
+
+ case res, ok := <-sink:
+ // New match result found
+ if !ok {
+ return
+ }
+ // Calculate the first and last blocks of the section
+ sectionStart := res.section * m.sectionSize
+
+ first := sectionStart
+ if begin > first {
+ first = begin
+ }
+ last := sectionStart + m.sectionSize - 1
+ if end < last {
+ last = end
+ }
+ // Iterate over all the blocks in the section and return the matching ones
+ for i := first; i <= last; i++ {
+ // If the bitset is nil, we're a special match-all cornercase
+ if res.bitset == nil {
+ select {
+ case <-session.quit:
+ return
+ case results <- i:
+ }
+ continue
+ }
+ // Skip the entire byte if no matches are found inside
+ next := res.bitset[(i-sectionStart)/8]
+ if next == 0 {
+ i += 7
+ continue
+ }
+ // Some bit it set, do the actual submatching
+ if bit := 7 - i%8; next&(1<<bit) != 0 {
+ select {
+ case <-session.quit:
+ return
+ case results <- i:
+ }
+ }
+ }
+ }
+ }
+ }()
+ return session, nil
}
-// newFetcher adds a fetcher for the given bit index if it has not existed before
-func (m *Matcher) newFetcher(idx uint) {
- if _, ok := m.fetchers[idx]; ok {
- return
+// run creates a daisy-chain of sub-matchers, one for the address set and one
+// for each topic set, each sub-matcher receiving a section only if the previous
+// ones have all found a potential match in one of the blocks of the section,
+// then binary AND-ing its own matches and forwaring the result to the next one.
+//
+// The method starts feeding the section indexes into the first sub-matcher on a
+// new goroutine and returns a sink channel receiving the results.
+func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
+ // Create the source channel and feed section indexes into
+ source := make(chan *partialMatches, buffer)
+
+ session.pend.Add(1)
+ go func() {
+ defer session.pend.Done()
+ defer close(source)
+
+ for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
+ select {
+ case <-session.quit:
+ return
+ case source <- &partialMatches{i, nil}:
+ }
+ }
+ }()
+ // Assemble the daisy-chained filtering pipeline
+ blooms := m.topics
+ if len(m.addresses) > 0 {
+ blooms = append([][]bloomIndexes{m.addresses}, blooms...)
}
- f := &fetcher{
- bloomIndex: idx,
- requestMap: make(map[uint64]fetchRequest),
+ next := source
+ dist := make(chan *request, buffer)
+
+ for _, bloom := range blooms {
+ next = m.subMatch(next, dist, bloom, session)
}
- m.fetchers[idx] = f
+ // Start the request distribution
+ session.pend.Add(1)
+ go m.distributor(dist, session)
+
+ return next
}
// subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
-// binary AND-s the result to the daisy-chain input (processCh) and forwards it to the daisy-chain output.
+// binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
// The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
// that address/topic, and binary AND-ing those vectors together.
-func (m *Matcher) subMatch(processCh chan partialMatches, bloomIndexLists []types.BloomIndexList) chan partialMatches {
- // set up fetchers
- fetchIndexChannels := make([][3]chan uint64, len(bloomIndexLists))
- fetchDataChannels := make([][3]chan []byte, len(bloomIndexLists))
- for i, bloomIndexList := range bloomIndexLists {
- for j, bloomIndex := range bloomIndexList {
- fetchIndexChannels[i][j] = make(chan uint64, channelCap)
- fetchDataChannels[i][j] = m.fetchers[bloomIndex].fetch(fetchIndexChannels[i][j], m.distCh, m.stop, &m.wg)
+func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
+ // Start the concurrent schedulers for each bit required by the bloom filter
+ sectionSources := make([][3]chan uint64, len(bloom))
+ sectionSinks := make([][3]chan []byte, len(bloom))
+ for i, bits := range bloom {
+ for j, bit := range bits {
+ sectionSources[i][j] = make(chan uint64, cap(source))
+ sectionSinks[i][j] = make(chan []byte, cap(source))
+
+ m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
}
}
- fetchedCh := make(chan partialMatches, channelCap) // entries from processCh are forwarded here after fetches have been initiated
- resultsCh := make(chan partialMatches, channelCap)
+ process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
+ results := make(chan *partialMatches, cap(source))
- m.wg.Add(2)
- // goroutine for starting retrievals
+ session.pend.Add(2)
go func() {
- defer m.wg.Done()
-
+ // Tear down the goroutine and terminate all source channels
+ defer session.pend.Done()
+ defer close(process)
+
+ defer func() {
+ for _, bloomSources := range sectionSources {
+ for _, bitSource := range bloomSources {
+ close(bitSource)
+ }
+ }
+ }()
+ // Read sections from the source channel and multiplex into all bit-schedulers
for {
select {
- case <-m.stop:
+ case <-session.quit:
return
- case s, ok := <-processCh:
+
+ case subres, ok := <-source:
+ // New subresult from previous link
if !ok {
- close(fetchedCh)
- for _, fetchIndexChs := range fetchIndexChannels {
- for _, fetchIndexCh := range fetchIndexChs {
- close(fetchIndexCh)
- }
- }
return
}
-
- for _, fetchIndexChs := range fetchIndexChannels {
- for _, fetchIndexCh := range fetchIndexChs {
+ // Multiplex the section index to all bit-schedulers
+ for _, bloomSources := range sectionSources {
+ for _, bitSource := range bloomSources {
select {
- case <-m.stop:
+ case <-session.quit:
return
- case fetchIndexCh <- s.sectionIndex:
+ case bitSource <- subres.section:
}
}
}
+ // Notify the processor that this section will become available
select {
- case <-m.stop:
+ case <-session.quit:
return
- case fetchedCh <- s:
+ case process <- subres:
}
}
}
}()
- // goroutine for processing retrieved data
go func() {
- defer m.wg.Done()
+ // Tear down the goroutine and terminate the final sink channel
+ defer session.pend.Done()
+ defer close(results)
+ // Read the source notifications and collect the delivered results
for {
select {
- case <-m.stop:
+ case <-session.quit:
return
- case s, ok := <-fetchedCh:
+
+ case subres, ok := <-process:
+ // Notified of a section being retrieved
if !ok {
- close(resultsCh)
return
}
-
+ // Gather all the sub-results and merge them together
var orVector []byte
- for _, fetchDataChs := range fetchDataChannels {
+ for _, bloomSinks := range sectionSinks {
var andVector []byte
- for _, fetchDataCh := range fetchDataChs {
+ for _, bitSink := range bloomSinks {
var data []byte
select {
- case <-m.stop:
+ case <-session.quit:
return
- case data = <-fetchDataCh:
+ case data = <-bitSink:
}
if andVector == nil {
andVector = make([]byte, int(m.sectionSize/8))
@@ -352,228 +375,277 @@ func (m *Matcher) subMatch(processCh chan partialMatches, bloomIndexLists []type
if orVector == nil {
orVector = make([]byte, int(m.sectionSize/8))
}
- if s.vector != nil {
- bitutil.ANDBytes(orVector, orVector, s.vector)
+ if subres.bitset != nil {
+ bitutil.ANDBytes(orVector, orVector, subres.bitset)
}
if bitutil.TestBytes(orVector) {
select {
- case <-m.stop:
+ case <-session.quit:
return
- case resultsCh <- partialMatches{s.sectionIndex, orVector}:
+ case results <- &partialMatches{subres.section, orVector}:
}
}
}
}
}()
-
- return resultsCh
+ return results
}
-// Start starts the matching process and returns a stream of bloom matches in
-// a given range of blocks.
-// It returns a results channel immediately and stops if Stop is called or there
-// are no more matches in the range (in which case the results channel is closed).
-// Start/Stop can be called multiple times for different ranges, in which case already
-// delivered bit vectors are not requested again.
-func (m *Matcher) Start(begin, end uint64) chan uint64 {
- m.stop = make(chan struct{})
- processCh := make(chan partialMatches, channelCap)
- resultsCh := make(chan uint64, channelCap)
-
- res := m.match(processCh)
-
- startSection := begin / m.sectionSize
- endSection := end / m.sectionSize
-
- m.wg.Add(2)
- go func() {
- defer m.wg.Done()
- defer close(processCh)
+// distributor receives requests from the schedulers and queues them into a set
+// of pending requests, which are assigned to retrievers wanting to fulfil them.
+func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
+ defer session.pend.Done()
+
+ var (
+ requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
+ unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
+ retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty)
+ )
+ var (
+ allocs int // Number of active allocations to handle graceful shutdown requests
+ shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
+ )
+
+ // assign is a helper method fo try to assign a pending bit an an actively
+ // listening servicer, or schedule it up for later when one arrives.
+ assign := func(bit uint) {
+ select {
+ case fetcher := <-m.retrievers:
+ allocs++
+ fetcher <- bit
+ default:
+ // No retrievers active, start listening for new ones
+ retrievers = m.retrievers
+ unallocs[bit] = struct{}{}
+ }
+ }
- for i := startSection; i <= endSection; i++ {
- select {
- case processCh <- partialMatches{i, nil}:
- case <-m.stop:
+ for {
+ select {
+ case <-shutdown:
+ // Graceful shutdown requested, wait until all pending requests are honoured
+ if allocs == 0 {
return
}
- }
- }()
+ shutdown = nil
- go func() {
- defer m.wg.Done()
- defer close(resultsCh)
+ case <-session.kill:
+ // Pending requests not honoured in time, hard terminate
+ return
- for {
- select {
- case r, ok := <-res:
- if !ok {
- return
- }
- sectionStart := r.sectionIndex * m.sectionSize
- s := sectionStart
- if begin > s {
- s = begin
- }
- e := sectionStart + m.sectionSize - 1
- if end < e {
- e = end
- }
- for i := s; i <= e; i++ {
- b := r.vector[(i-sectionStart)/8]
- bit := 7 - i%8
- if b != 0 {
- if b&(1<<bit) != 0 {
- select {
- case <-m.stop:
- return
- case resultsCh <- i:
- }
- }
- } else {
- i += bit
- }
- }
+ case req := <-dist:
+ // New retrieval request arrived to be distributed to some fetcher process
+ queue := requests[req.bit]
+ index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
+ requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
- case <-m.stop:
- return
+ // If it's a new bit and we have waiting fetchers, allocate to them
+ if len(queue) == 0 {
+ assign(req.bit)
}
- }
- }()
-
- return resultsCh
-}
-
-// Stop stops the matching process
-func (m *Matcher) Stop() {
- close(m.stop)
- m.distWg.Wait()
-}
-// distributeRequests receives requests from the fetchers and either queues them
-// or immediately forwards them to one of the waiting NextRequest functions.
-// Requests with a lower section idx are always prioritized.
-func (m *Matcher) distributeRequests() {
- m.distWg.Add(1)
- stopDist := make(chan struct{})
- go func() {
- <-m.stop
- m.wg.Wait()
- close(stopDist)
- }()
+ case fetcher := <-retrievers:
+ // New retriever arrived, find the lowest section-ed bit to assign
+ bit, best := uint(0), uint64(math.MaxUint64)
+ for idx := range unallocs {
+ if requests[idx][0] < best {
+ bit, best = idx, requests[idx][0]
+ }
+ }
+ // Stop tracking this bit (and alloc notifications if no more work is available)
+ delete(unallocs, bit)
+ if len(unallocs) == 0 {
+ retrievers = nil
+ }
+ allocs++
+ fetcher <- bit
+
+ case fetcher := <-m.counters:
+ // New task count request arrives, return number of items
+ fetcher <- uint(len(requests[<-fetcher]))
+
+ case fetcher := <-m.retrievals:
+ // New fetcher waiting for tasks to retrieve, assign
+ task := <-fetcher
+ if want := len(task.Sections); want >= len(requests[task.Bit]) {
+ task.Sections = requests[task.Bit]
+ delete(requests, task.Bit)
+ } else {
+ task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
+ requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
+ }
+ fetcher <- task
- m.running = true
+ // If anything was left unallocated, try to assign to someone else
+ if len(requests[task.Bit]) > 0 {
+ assign(task.Bit)
+ }
- go func() {
- for {
- select {
- case r := <-m.distCh:
- m.lock.Lock()
- queue := m.reqs[r.bloomIndex]
- i := 0
- for i < len(queue) && r.sectionIndex > queue[i] {
- i++
+ case result := <-m.deliveries:
+ // New retrieval task response from fetcher, split out missing sections and
+ // deliver complete ones
+ var (
+ sections = make([]uint64, 0, len(result.Sections))
+ bitsets = make([][]byte, 0, len(result.Bitsets))
+ missing = make([]uint64, 0, len(result.Sections))
+ )
+ for i, bitset := range result.Bitsets {
+ if len(bitset) == 0 {
+ missing = append(missing, result.Sections[i])
+ continue
}
- queue = append(queue, 0)
- copy(queue[i+1:], queue[i:len(queue)-1])
- queue[i] = r.sectionIndex
- m.reqs[r.bloomIndex] = queue
- if len(queue) == 1 {
- m.freeQueue(r.bloomIndex)
+ sections = append(sections, result.Sections[i])
+ bitsets = append(bitsets, bitset)
+ }
+ m.schedulers[result.Bit].deliver(sections, bitsets)
+ allocs--
+
+ // Reschedule missing sections and allocate bit if newly available
+ if len(missing) > 0 {
+ queue := requests[result.Bit]
+ for _, section := range missing {
+ index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
+ queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
}
- m.lock.Unlock()
- case <-stopDist:
- m.lock.Lock()
- for _, ch := range m.allocQueue {
- close(ch)
+ requests[result.Bit] = queue
+
+ if len(queue) == len(missing) {
+ assign(result.Bit)
}
- m.allocQueue = nil
- m.running = false
- m.lock.Unlock()
- m.distWg.Done()
+ }
+ // If we're in the process of shutting down, terminate
+ if allocs == 0 && shutdown == nil {
return
}
}
- }()
+ }
}
-// freeQueue marks a queue as free if there are no AllocSectionQueue functions
-// waiting for allocation. If there is someone waiting, the queue is immediately
-// allocated.
-func (m *Matcher) freeQueue(bloomIndex uint) {
- if len(m.allocQueue) > 0 {
- m.allocQueue[0] <- bloomIndex
- m.allocQueue = m.allocQueue[1:]
- } else {
- m.freeQueues[bloomIndex] = struct{}{}
+// MatcherSession is returned by a started matcher to be used as a terminator
+// for the actively running matching operation.
+type MatcherSession struct {
+ matcher *Matcher
+
+ quit chan struct{} // Quit channel to request pipeline termination
+ kill chan struct{} // Term channel to signal non-graceful forced shutdown
+ pend sync.WaitGroup
+}
+
+// Close stops the matching process and waits for all subprocesses to terminate
+// before returning. The timeout may be used for graceful shutdown, allowing the
+// currently running retrievals to complete before this time.
+func (s *MatcherSession) Close(timeout time.Duration) {
+ // Bail out if the matcher is not running
+ select {
+ case <-s.quit:
+ return
+ default:
}
+ // Signal termination and wait for all goroutines to tear down
+ close(s.quit)
+ time.AfterFunc(timeout, func() { close(s.kill) })
+ s.pend.Wait()
}
-// AllocSectionQueue allocates a queue of requested section indexes belonging to the same
-// bloom bit index for a client process that can either immediately fetch the contents
-// of the queue or wait a little while for more section indexes to be requested.
-func (m *Matcher) AllocSectionQueue() (uint, bool) {
- m.lock.Lock()
- if !m.running {
- m.lock.Unlock()
+// AllocateRetrieval assigns a bloom bit index to a client process that can either
+// immediately reuest and fetch the section contents assigned to this bit or wait
+// a little while for more sections to be requested.
+func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
+ fetcher := make(chan uint)
+
+ select {
+ case <-s.quit:
return 0, false
+ case s.matcher.retrievers <- fetcher:
+ bit, ok := <-fetcher
+ return bit, ok
}
+}
- var allocCh chan uint
- if len(m.freeQueues) > 0 {
- var (
- found bool
- bestSection uint64
- bestIndex uint
- )
- for bloomIndex, _ := range m.freeQueues {
- if !found || m.reqs[bloomIndex][0] < bestSection {
- found = true
- bestIndex = bloomIndex
- bestSection = m.reqs[bloomIndex][0]
- }
- }
- delete(m.freeQueues, bestIndex)
- m.lock.Unlock()
- return bestIndex, true
- } else {
- allocCh = make(chan uint)
- m.allocQueue = append(m.allocQueue, allocCh)
+// PendingSections returns the number of pending section retrievals belonging to
+// the given bloom bit index.
+func (s *MatcherSession) PendingSections(bit uint) int {
+ fetcher := make(chan uint)
+
+ select {
+ case <-s.quit:
+ return 0
+ case s.matcher.counters <- fetcher:
+ fetcher <- bit
+ return int(<-fetcher)
}
- m.lock.Unlock()
-
- bloomIndex, ok := <-allocCh
- return bloomIndex, ok
}
-// SectionCount returns the length of the section index queue belonging to the given bloom bit index
-func (m *Matcher) SectionCount(bloomIndex uint) int {
- m.lock.Lock()
- defer m.lock.Unlock()
-
- return len(m.reqs[bloomIndex])
+// AllocateSections assigns all or part of an already allocated bit-task queue
+// to the requesting process.
+func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
+ fetcher := make(chan *Retrieval)
+
+ select {
+ case <-s.quit:
+ return nil
+ case s.matcher.retrievals <- fetcher:
+ task := &Retrieval{
+ Bit: bit,
+ Sections: make([]uint64, count),
+ }
+ fetcher <- task
+ return (<-fetcher).Sections
+ }
}
-// FetchSections fetches all or part of an already allocated queue and deallocates it
-func (m *Matcher) FetchSections(bloomIndex uint, maxCount int) []uint64 {
- m.lock.Lock()
- defer m.lock.Unlock()
-
- queue := m.reqs[bloomIndex]
- if maxCount < len(queue) {
- // return only part of the existing queue, mark the rest as free
- m.reqs[bloomIndex] = queue[maxCount:]
- m.freeQueue(bloomIndex)
- return queue[:maxCount]
- } else {
- // return the entire queue
- delete(m.reqs, bloomIndex)
- return queue
+// DeliverSections delivers a batch of section bit-vectors for a specific bloom
+// bit index to be injected into the processing pipeline.
+func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
+ select {
+ case <-s.kill:
+ return
+ case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
}
}
-// Deliver delivers a bit vector to the appropriate fetcher.
-// It is possible to deliver data even after Stop has been called. Once a vector has been
-// requested, the matcher will keep waiting for delivery.
-func (m *Matcher) Deliver(bloomIndex uint, sectionIdxList []uint64, data [][]byte) {
- m.fetchers[bloomIndex].deliver(sectionIdxList, data)
+// Multiplex polls the matcher session for rerieval tasks and multiplexes it into
+// the reuested retrieval queue to be serviced together with other sessions.
+//
+// This method will block for the lifetime of the session. Even after termination
+// of the session, any request in-flight need to be responded to! Empty responses
+// are fine though in that case.
+func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
+ for {
+ // Allocate a new bloom bit index to retrieve data for, stopping when done
+ bit, ok := s.AllocateRetrieval()
+ if !ok {
+ return
+ }
+ // Bit allocated, throttle a bit if we're below our batch limit
+ if s.PendingSections(bit) < batch {
+ select {
+ case <-s.quit:
+ // Session terminating, we can't meaningfully service, abort
+ s.AllocateSections(bit, 0)
+ s.DeliverSections(bit, []uint64{}, [][]byte{})
+ return
+
+ case <-time.After(wait):
+ // Throttling up, fetch whatever's available
+ }
+ }
+ // Allocate as much as we can handle and request servicing
+ sections := s.AllocateSections(bit, batch)
+ request := make(chan *Retrieval)
+
+ select {
+ case <-s.quit:
+ // Session terminating, we can't meaningfully service, abort
+ s.DeliverSections(bit, sections, make([][]byte, len(sections)))
+ return
+
+ case mux <- request:
+ // Retrieval accepted, something must arrive before we're aborting
+ request <- &Retrieval{Bit: bit, Sections: sections}
+
+ result := <-request
+ s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
+ }
+ }
}
diff --git a/core/bloombits/matcher_test.go b/core/bloombits/matcher_test.go
index bef1491b8..fc49b43b8 100644
--- a/core/bloombits/matcher_test.go
+++ b/core/bloombits/matcher_test.go
@@ -13,6 +13,7 @@
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
package bloombits
import (
@@ -20,177 +21,219 @@ import (
"sync/atomic"
"testing"
"time"
-
- "github.com/ethereum/go-ethereum/core/types"
)
const testSectionSize = 4096
-func matcherTestVector(b uint, s uint64) []byte {
- r := make([]byte, testSectionSize/8)
- for i, _ := range r {
- var bb byte
- for bit := 0; bit < 8; bit++ {
- blockIdx := s*testSectionSize + uint64(i*8+bit)
- bb += bb
- if (blockIdx % uint64(b)) == 0 {
- bb++
- }
- }
- r[i] = bb
- }
- return r
+// Tests the matcher pipeline on a single continuous workflow without interrupts.
+func TestMatcherContinuous(t *testing.T) {
+ testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 100000, false, 75)
+ testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, false, 81)
+ testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, false, 36)
}
-func expMatch1(idxs types.BloomIndexList, i uint64) bool {
- for _, ii := range idxs {
- if (i % uint64(ii)) != 0 {
- return false
- }
- }
- return true
+// Tests the matcher pipeline on a constantly interrupted and resumed work pattern
+// with the aim of ensuring data items are requested only once.
+func TestMatcherIntermittent(t *testing.T) {
+ testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 100000, true, 75)
+ testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, true, 81)
+ testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, true, 36)
}
-func expMatch2(idxs []types.BloomIndexList, i uint64) bool {
- for _, ii := range idxs {
- if expMatch1(ii, i) {
- return true
- }
+// Tests the matcher pipeline on random input to hopefully catch anomalies.
+func TestMatcherRandom(t *testing.T) {
+ for i := 0; i < 10; i++ {
+ testMatcherBothModes(t, makeRandomIndexes([]int{1}, 50), 10000, 0)
+ testMatcherBothModes(t, makeRandomIndexes([]int{3}, 50), 10000, 0)
+ testMatcherBothModes(t, makeRandomIndexes([]int{2, 2, 2}, 20), 10000, 0)
+ testMatcherBothModes(t, makeRandomIndexes([]int{5, 5, 5}, 50), 10000, 0)
+ testMatcherBothModes(t, makeRandomIndexes([]int{4, 4, 4}, 20), 10000, 0)
}
- return false
}
-func expMatch3(idxs [][]types.BloomIndexList, i uint64) bool {
- for _, ii := range idxs {
- if !expMatch2(ii, i) {
- return false
+// makeRandomIndexes generates a random filter system, composed on multiple filter
+// criteria, each having one bloom list component for the address and arbitrarilly
+// many topic bloom list components.
+func makeRandomIndexes(lengths []int, max int) [][]bloomIndexes {
+ res := make([][]bloomIndexes, len(lengths))
+ for i, topics := range lengths {
+ res[i] = make([]bloomIndexes, topics)
+ for j := 0; j < topics; j++ {
+ for k := 0; k < len(res[i][j]); k++ {
+ res[i][j][k] = uint(rand.Intn(max-1) + 2)
+ }
}
}
- return true
+ return res
}
-func testServeMatcher(m *Matcher, stop chan struct{}, cnt *uint32, maxRequestLen int) {
- // serve matcher with test vectors
- for i := 0; i < 10; i++ {
- go func() {
- for {
- select {
- case <-stop:
- return
- default:
- }
- b, ok := m.AllocSectionQueue()
- if !ok {
- return
- }
- if m.SectionCount(b) < maxRequestLen {
- time.Sleep(time.Microsecond * 100)
- }
- s := m.FetchSections(b, maxRequestLen)
- res := make([][]byte, len(s))
- for i, ss := range s {
- res[i] = matcherTestVector(b, ss)
- atomic.AddUint32(cnt, 1)
- }
- m.Deliver(b, s, res)
- }
- }()
+// testMatcherDiffBatches runs the given matches test in single-delivery and also
+// in batches delivery mode, verifying that all kinds of deliveries are handled
+// correctly withn.
+func testMatcherDiffBatches(t *testing.T, filter [][]bloomIndexes, blocks uint64, intermittent bool, retrievals uint32) {
+ singleton := testMatcher(t, filter, blocks, intermittent, retrievals, 1)
+ batched := testMatcher(t, filter, blocks, intermittent, retrievals, 16)
+
+ if singleton != batched {
+ t.Errorf("filter = %v blocks = %v intermittent = %v: request count mismatch, %v in signleton vs. %v in batched mode", filter, blocks, intermittent, singleton, batched)
}
}
-func testMatcher(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32) uint32 {
- count1 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 1)
- count16 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 16)
- if count1 != count16 {
- t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: request count mismatch, %v with maxReqCount = 1 vs. %v with maxReqCount = 16", idxs, cnt, stopOnMatches, count1, count16)
+// testMatcherBothModes runs the given matcher test in both continuous as well as
+// in intermittent mode, verifying that the request counts match each other.
+func testMatcherBothModes(t *testing.T, filter [][]bloomIndexes, blocks uint64, retrievals uint32) {
+ continuous := testMatcher(t, filter, blocks, false, retrievals, 16)
+ intermittent := testMatcher(t, filter, blocks, true, retrievals, 16)
+
+ if continuous != intermittent {
+ t.Errorf("filter = %v blocks = %v: request count mismatch, %v in continuous vs. %v in intermittent mode", filter, blocks, continuous, intermittent)
}
- return count1
}
-func testMatcherWithReqCount(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32, maxReqCount int) uint32 {
- m := NewMatcher(testSectionSize, nil, nil)
+// testMatcher is a generic tester to run the given matcher test and return the
+// number of requests made for cross validation between different modes.
+func testMatcher(t *testing.T, filter [][]bloomIndexes, blocks uint64, intermittent bool, retrievals uint32, maxReqCount int) uint32 {
+ // Create a new matcher an simulate our explicit random bitsets
+ matcher := NewMatcher(testSectionSize, nil, nil)
- for _, idxss := range idxs {
- for _, idxs := range idxss {
- for _, idx := range idxs {
- m.newFetcher(idx)
+ matcher.addresses = filter[0]
+ matcher.topics = filter[1:]
+
+ for _, rule := range filter {
+ for _, topic := range rule {
+ for _, bit := range topic {
+ matcher.addScheduler(bit)
}
}
}
+ // Track the number of retrieval requests made
+ var requested uint32
- m.addresses = idxs[0]
- m.topics = idxs[1:]
- var reqCount uint32
+ // Start the matching session for the filter and the retriver goroutines
+ quit := make(chan struct{})
+ matches := make(chan uint64, 16)
- stop := make(chan struct{})
- chn := m.Start(0, cnt-1)
- testServeMatcher(m, stop, &reqCount, maxReqCount)
+ session, err := matcher.Start(0, blocks-1, matches)
+ if err != nil {
+ t.Fatalf("failed to stat matcher session: %v", err)
+ }
+ startRetrievers(session, quit, &requested, maxReqCount)
- for i := uint64(0); i < cnt; i++ {
- if expMatch3(idxs, i) {
- match, ok := <-chn
+ // Iterate over all the blocks and verify that the pipeline produces the correct matches
+ for i := uint64(0); i < blocks; i++ {
+ if expMatch3(filter, i) {
+ match, ok := <-matches
if !ok {
- t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected #%v, results channel closed", idxs, cnt, stopOnMatches, i)
+ t.Errorf("filter = %v blocks = %v intermittent = %v: expected #%v, results channel closed", filter, blocks, intermittent, i)
return 0
}
if match != i {
- t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected #%v, got #%v", idxs, cnt, stopOnMatches, i, match)
+ t.Errorf("filter = %v blocks = %v intermittent = %v: expected #%v, got #%v", filter, blocks, intermittent, i, match)
}
- if stopOnMatches {
- m.Stop()
- close(stop)
- stop = make(chan struct{})
- chn = m.Start(i+1, cnt-1)
- testServeMatcher(m, stop, &reqCount, maxReqCount)
+ // If we're testing intermittent mode, abort and restart the pipeline
+ if intermittent {
+ session.Close(time.Second)
+ close(quit)
+
+ quit = make(chan struct{})
+ matches = make(chan uint64, 16)
+
+ session, err = matcher.Start(i+1, blocks-1, matches)
+ if err != nil {
+ t.Fatalf("failed to stat matcher session: %v", err)
+ }
+ startRetrievers(session, quit, &requested, maxReqCount)
}
}
}
- match, ok := <-chn
+ // Ensure the result channel is torn down after the last block
+ match, ok := <-matches
if ok {
- t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected closed channel, got #%v", idxs, cnt, stopOnMatches, match)
+ t.Errorf("filter = %v blocks = %v intermittent = %v: expected closed channel, got #%v", filter, blocks, intermittent, match)
}
- m.Stop()
- close(stop)
+ // Clean up the session and ensure we match the expected retrieval count
+ session.Close(time.Second)
+ close(quit)
- if expCount != 0 && expCount != reqCount {
- t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: request count mismatch, expected #%v, got #%v", idxs, cnt, stopOnMatches, expCount, reqCount)
+ if retrievals != 0 && requested != retrievals {
+ t.Errorf("filter = %v blocks = %v intermittent = %v: request count mismatch, have #%v, want #%v", filter, blocks, intermittent, requested, retrievals)
}
+ return requested
+}
+
+// startRetrievers starts a batch of goroutines listening for section requests
+// and serving them.
+func startRetrievers(session *MatcherSession, quit chan struct{}, retrievals *uint32, batch int) {
+ requests := make(chan chan *Retrieval)
+
+ for i := 0; i < 10; i++ {
+ // Start a multiplexer to test multiple threaded execution
+ go session.Multiplex(batch, 100*time.Microsecond, requests)
- return reqCount
+ // Start a services to match the above multiplexer
+ go func() {
+ for {
+ // Wait for a service request or a shutdown
+ select {
+ case <-quit:
+ return
+
+ case request := <-requests:
+ task := <-request
+
+ task.Bitsets = make([][]byte, len(task.Sections))
+ for i, section := range task.Sections {
+ if rand.Int()%4 != 0 { // Handle occasional missing deliveries
+ task.Bitsets[i] = generateBitset(task.Bit, section)
+ atomic.AddUint32(retrievals, 1)
+ }
+ }
+ request <- task
+ }
+ }
+ }()
+ }
}
-func testRandomIdxs(l []int, max int) [][]types.BloomIndexList {
- res := make([][]types.BloomIndexList, len(l))
- for i, ll := range l {
- res[i] = make([]types.BloomIndexList, ll)
- for j, _ := range res[i] {
- for k, _ := range res[i][j] {
- res[i][j][k] = uint(rand.Intn(max-1) + 2)
+// generateBitset generates the rotated bitset for the given bloom bit and section
+// numbers.
+func generateBitset(bit uint, section uint64) []byte {
+ bitset := make([]byte, testSectionSize/8)
+ for i := 0; i < len(bitset); i++ {
+ for b := 0; b < 8; b++ {
+ blockIdx := section*testSectionSize + uint64(i*8+b)
+ bitset[i] += bitset[i]
+ if (blockIdx % uint64(bit)) == 0 {
+ bitset[i]++
}
}
}
- return res
+ return bitset
}
-func TestMatcher(t *testing.T) {
- testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, false, 75)
- testMatcher(t, [][]types.BloomIndexList{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, false, 81)
- testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, false, 36)
+func expMatch1(filter bloomIndexes, i uint64) bool {
+ for _, ii := range filter {
+ if (i % uint64(ii)) != 0 {
+ return false
+ }
+ }
+ return true
}
-func TestMatcherStopOnMatches(t *testing.T) {
- testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, true, 75)
- testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, true, 36)
+func expMatch2(filter []bloomIndexes, i uint64) bool {
+ for _, ii := range filter {
+ if expMatch1(ii, i) {
+ return true
+ }
+ }
+ return false
}
-func TestMatcherRandom(t *testing.T) {
- for i := 0; i < 20; i++ {
- testMatcher(t, testRandomIdxs([]int{1}, 50), 100000, false, 0)
- testMatcher(t, testRandomIdxs([]int{3}, 50), 100000, false, 0)
- testMatcher(t, testRandomIdxs([]int{2, 2, 2}, 20), 100000, false, 0)
- testMatcher(t, testRandomIdxs([]int{5, 5, 5}, 50), 100000, false, 0)
- idxs := testRandomIdxs([]int{2, 2, 2}, 20)
- reqCount := testMatcher(t, idxs, 10000, false, 0)
- testMatcher(t, idxs, 10000, true, reqCount)
+func expMatch3(filter [][]bloomIndexes, i uint64) bool {
+ for _, ii := range filter {
+ if !expMatch2(ii, i) {
+ return false
+ }
}
+ return true
}
diff --git a/core/bloombits/scheduler.go b/core/bloombits/scheduler.go
new file mode 100644
index 000000000..6449c7465
--- /dev/null
+++ b/core/bloombits/scheduler.go
@@ -0,0 +1,181 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+ "sync"
+)
+
+// request represents a bloom retrieval task to prioritize and pull from the local
+// database or remotely from the network.
+type request struct {
+ section uint64 // Section index to retrieve the a bit-vector from
+ bit uint // Bit index within the section to retrieve the vector of
+}
+
+// response represents the state of a requested bit-vector through a scheduler.
+type response struct {
+ cached []byte // Cached bits to dedup multiple requests
+ done chan struct{} // Channel to allow waiting for completion
+}
+
+// scheduler handles the scheduling of bloom-filter retrieval operations for
+// entire section-batches belonging to a single bloom bit. Beside scheduling the
+// retrieval operations, this struct also deduplicates the requests and caches
+// the results to minimize network/database overhead even in complex filtering
+// scenarios.
+type scheduler struct {
+ bit uint // Index of the bit in the bloom filter this scheduler is responsible for
+ responses map[uint64]*response // Currently pending retrieval requests or already cached responses
+ lock sync.Mutex // Lock protecting the responses from concurrent access
+}
+
+// newScheduler creates a new bloom-filter retrieval scheduler for a specific
+// bit index.
+func newScheduler(idx uint) *scheduler {
+ return &scheduler{
+ bit: idx,
+ responses: make(map[uint64]*response),
+ }
+}
+
+// run creates a retrieval pipeline, receiving section indexes from sections and
+// returning the results in the same order through the done channel. Concurrent
+// runs of the same scheduler are allowed, leading to retrieval task deduplication.
+func (s *scheduler) run(sections chan uint64, dist chan *request, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) {
+ // Create a forwarder channel between requests and responses of the same size as
+ // the distribution channel (since that will block the pipeline anyway).
+ pend := make(chan uint64, cap(dist))
+
+ // Start the pipeline schedulers to forward between user -> distributor -> user
+ wg.Add(2)
+ go s.scheduleRequests(sections, dist, pend, quit, wg)
+ go s.scheduleDeliveries(pend, done, quit, wg)
+}
+
+// reset cleans up any leftovers from previous runs. This is required before a
+// restart to ensure the no previously requested but never delivered state will
+// cause a lockup.
+func (s *scheduler) reset() {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+
+ for section, res := range s.responses {
+ if res.cached == nil {
+ delete(s.responses, section)
+ }
+ }
+}
+
+// scheduleRequests reads section retrieval requests from the input channel,
+// deduplicates the stream and pushes unique retrieval tasks into the distribution
+// channel for a database or network layer to honour.
+func (s *scheduler) scheduleRequests(reqs chan uint64, dist chan *request, pend chan uint64, quit chan struct{}, wg *sync.WaitGroup) {
+ // Clean up the goroutine and pipeline when done
+ defer wg.Done()
+ defer close(pend)
+
+ // Keep reading and scheduling section requests
+ for {
+ select {
+ case <-quit:
+ return
+
+ case section, ok := <-reqs:
+ // New section retrieval requested
+ if !ok {
+ return
+ }
+ // Deduplicate retrieval requests
+ unique := false
+
+ s.lock.Lock()
+ if s.responses[section] == nil {
+ s.responses[section] = &response{
+ done: make(chan struct{}),
+ }
+ unique = true
+ }
+ s.lock.Unlock()
+
+ // Schedule the section for retrieval and notify the deliverer to expect this section
+ if unique {
+ select {
+ case <-quit:
+ return
+ case dist <- &request{bit: s.bit, section: section}:
+ }
+ }
+ select {
+ case <-quit:
+ return
+ case pend <- section:
+ }
+ }
+ }
+}
+
+// scheduleDeliveries reads section acceptance notifications and waits for them
+// to be delivered, pushing them into the output data buffer.
+func (s *scheduler) scheduleDeliveries(pend chan uint64, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) {
+ // Clean up the goroutine and pipeline when done
+ defer wg.Done()
+ defer close(done)
+
+ // Keep reading notifications and scheduling deliveries
+ for {
+ select {
+ case <-quit:
+ return
+
+ case idx, ok := <-pend:
+ // New section retrieval pending
+ if !ok {
+ return
+ }
+ // Wait until the request is honoured
+ s.lock.Lock()
+ res := s.responses[idx]
+ s.lock.Unlock()
+
+ select {
+ case <-quit:
+ return
+ case <-res.done:
+ }
+ // Deliver the result
+ select {
+ case <-quit:
+ return
+ case done <- res.cached:
+ }
+ }
+ }
+}
+
+// deliver is called by the request distributor when a reply to a request arrives.
+func (s *scheduler) deliver(sections []uint64, data [][]byte) {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+
+ for i, section := range sections {
+ if res := s.responses[section]; res != nil && res.cached == nil { // Avoid non-requests and double deliveries
+ res.cached = data[i]
+ close(res.done)
+ }
+ }
+}
diff --git a/core/bloombits/scheduler_test.go b/core/bloombits/scheduler_test.go
new file mode 100644
index 000000000..8a159c237
--- /dev/null
+++ b/core/bloombits/scheduler_test.go
@@ -0,0 +1,105 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package bloombits
+
+import (
+ "bytes"
+ "math/big"
+ "math/rand"
+ "sync"
+ "sync/atomic"
+ "testing"
+ "time"
+)
+
+// Tests that the scheduler can deduplicate and forward retrieval requests to
+// underlying fetchers and serve responses back, irrelevant of the concurrency
+// of the requesting clients or serving data fetchers.
+func TestSchedulerSingleClientSingleFetcher(t *testing.T) { testScheduler(t, 1, 1, 5000) }
+func TestSchedulerSingleClientMultiFetcher(t *testing.T) { testScheduler(t, 1, 10, 5000) }
+func TestSchedulerMultiClientSingleFetcher(t *testing.T) { testScheduler(t, 10, 1, 5000) }
+func TestSchedulerMultiClientMultiFetcher(t *testing.T) { testScheduler(t, 10, 10, 5000) }
+
+func testScheduler(t *testing.T, clients int, fetchers int, requests int) {
+ f := newScheduler(0)
+
+ // Create a batch of handler goroutines that respond to bloom bit requests and
+ // deliver them to the scheduler.
+ var fetchPend sync.WaitGroup
+ fetchPend.Add(fetchers)
+ defer fetchPend.Wait()
+
+ fetch := make(chan *request, 16)
+ defer close(fetch)
+
+ var delivered uint32
+ for i := 0; i < fetchers; i++ {
+ go func() {
+ defer fetchPend.Done()
+
+ for req := range fetch {
+ time.Sleep(time.Duration(rand.Intn(int(100 * time.Microsecond))))
+ atomic.AddUint32(&delivered, 1)
+
+ f.deliver([]uint64{
+ req.section + uint64(requests), // Non-requested data (ensure it doesn't go out of bounds)
+ req.section, // Requested data
+ req.section, // Duplicated data (ensure it doesn't double close anything)
+ }, [][]byte{
+ []byte{},
+ new(big.Int).SetUint64(req.section).Bytes(),
+ new(big.Int).SetUint64(req.section).Bytes(),
+ })
+ }
+ }()
+ }
+ // Start a batch of goroutines to concurrently run scheduling tasks
+ quit := make(chan struct{})
+
+ var pend sync.WaitGroup
+ pend.Add(clients)
+
+ for i := 0; i < clients; i++ {
+ go func() {
+ defer pend.Done()
+
+ in := make(chan uint64, 16)
+ out := make(chan []byte, 16)
+
+ f.run(in, fetch, out, quit, &pend)
+
+ go func() {
+ for j := 0; j < requests; j++ {
+ in <- uint64(j)
+ }
+ close(in)
+ }()
+
+ for j := 0; j < requests; j++ {
+ bits := <-out
+ if want := new(big.Int).SetUint64(uint64(j)).Bytes(); !bytes.Equal(bits, want) {
+ t.Errorf("vector %d: delivered content mismatch: have %x, want %x", j, bits, want)
+ }
+ }
+ }()
+ }
+ pend.Wait()
+
+ if have := atomic.LoadUint32(&delivered); int(have) != requests {
+ t.Errorf("request count mismatch: have %v, want %v", have, requests)
+ }
+}
diff --git a/core/bloombits/utils.go b/core/bloombits/utils.go
deleted file mode 100644
index d0755cb65..000000000
--- a/core/bloombits/utils.go
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2017 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-package bloombits
-
-import (
- "github.com/ethereum/go-ethereum/core/types"
-)
-
-const BloomLength = 2048
-
-// BloomBitsCreator takes SectionSize number of header bloom filters and calculates the bloomBits vectors of the section
-type BloomBitsCreator struct {
- blooms [BloomLength][]byte
- sectionSize, bitIndex uint64
-}
-
-func NewBloomBitsCreator(sectionSize uint64) *BloomBitsCreator {
- b := &BloomBitsCreator{sectionSize: sectionSize}
- for i, _ := range b.blooms {
- b.blooms[i] = make([]byte, sectionSize/8)
- }
- return b
-}
-
-// AddHeaderBloom takes a single bloom filter and sets the corresponding bit column in memory accordingly
-func (b *BloomBitsCreator) AddHeaderBloom(bloom types.Bloom) {
- if b.bitIndex >= b.sectionSize {
- panic("too many header blooms added")
- }
-
- byteIdx := b.bitIndex / 8
- bitMask := byte(1) << byte(7-b.bitIndex%8)
- for bloomBitIdx, _ := range b.blooms {
- bloomByteIdx := BloomLength/8 - 1 - bloomBitIdx/8
- bloomBitMask := byte(1) << byte(bloomBitIdx%8)
- if (bloom[bloomByteIdx] & bloomBitMask) != 0 {
- b.blooms[bloomBitIdx][byteIdx] |= bitMask
- }
- }
- b.bitIndex++
-}
-
-// GetBitVector returns the bit vector belonging to the given bit index after header blooms have been added
-func (b *BloomBitsCreator) GetBitVector(idx uint) []byte {
- if b.bitIndex != b.sectionSize {
- panic("not enough header blooms added")
- }
-
- return b.blooms[idx][:]
-}