diff options
Diffstat (limited to 'core')
-rw-r--r-- | core/blockchain.go | 15 | ||||
-rw-r--r-- | core/bloombits/fetcher_test.go | 101 | ||||
-rw-r--r-- | core/bloombits/matcher.go | 579 | ||||
-rw-r--r-- | core/bloombits/matcher_test.go | 196 | ||||
-rw-r--r-- | core/bloombits/utils.go | 63 | ||||
-rw-r--r-- | core/chain_indexer.go | 76 | ||||
-rw-r--r-- | core/chain_indexer_test.go | 7 | ||||
-rw-r--r-- | core/database_util.go | 69 | ||||
-rw-r--r-- | core/database_util_test.go | 108 | ||||
-rw-r--r-- | core/types/bloom9.go | 14 |
10 files changed, 1037 insertions, 191 deletions
diff --git a/core/blockchain.go b/core/blockchain.go index 0bb12fc19..d74b3520b 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -759,12 +759,6 @@ func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain [ log.Crit("Failed to write block receipts", "err", err) return } - if err := WriteMipmapBloom(bc.chainDb, block.NumberU64(), receipts); err != nil { - errs[index] = fmt.Errorf("failed to write log blooms: %v", err) - atomic.AddInt32(&failed, 1) - log.Crit("Failed to write log blooms", "err", err) - return - } if err := WriteTxLookupEntries(bc.chainDb, block); err != nil { errs[index] = fmt.Errorf("failed to write lookup metadata: %v", err) atomic.AddInt32(&failed, 1) @@ -1017,10 +1011,6 @@ func (bc *BlockChain) InsertChain(chain types.Blocks) (int, error) { if err := WriteTxLookupEntries(bc.chainDb, block); err != nil { return i, err } - // Write map map bloom filters - if err := WriteMipmapBloom(bc.chainDb, block.NumberU64(), receipts); err != nil { - return i, err - } // Write hash preimages if err := WritePreimages(bc.chainDb, block.NumberU64(), state.Preimages()); err != nil { return i, err @@ -1178,11 +1168,6 @@ func (bc *BlockChain) reorg(oldBlock, newBlock *types.Block) error { if err := WriteTxLookupEntries(bc.chainDb, block); err != nil { return err } - // Write map map bloom filters - receipts := GetBlockReceipts(bc.chainDb, block.Hash(), block.NumberU64()) - if err := WriteMipmapBloom(bc.chainDb, block.NumberU64(), receipts); err != nil { - return err - } addedTxs = append(addedTxs, block.Transactions()...) } diff --git a/core/bloombits/fetcher_test.go b/core/bloombits/fetcher_test.go new file mode 100644 index 000000000..9c229cf8d --- /dev/null +++ b/core/bloombits/fetcher_test.go @@ -0,0 +1,101 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. +package bloombits + +import ( + "bytes" + "encoding/binary" + "math/rand" + "sync" + "sync/atomic" + "testing" + "time" +) + +const testFetcherReqCount = 5000 + +func fetcherTestVector(b uint, s uint64) []byte { + r := make([]byte, 10) + binary.BigEndian.PutUint16(r[0:2], uint16(b)) + binary.BigEndian.PutUint64(r[2:10], s) + return r +} + +func TestFetcher(t *testing.T) { + testFetcher(t, 1) +} + +func TestFetcherMultipleReaders(t *testing.T) { + testFetcher(t, 10) +} + +func testFetcher(t *testing.T, cnt int) { + f := &fetcher{ + requestMap: make(map[uint64]fetchRequest), + } + distCh := make(chan distRequest, channelCap) + stop := make(chan struct{}) + var reqCount uint32 + + for i := 0; i < 10; i++ { + go func() { + for { + req, ok := <-distCh + if !ok { + return + } + time.Sleep(time.Duration(rand.Intn(100000))) + atomic.AddUint32(&reqCount, 1) + f.deliver([]uint64{req.sectionIndex}, [][]byte{fetcherTestVector(req.bloomIndex, req.sectionIndex)}) + } + }() + } + + var wg, wg2 sync.WaitGroup + for cc := 0; cc < cnt; cc++ { + wg.Add(1) + in := make(chan uint64, channelCap) + out := f.fetch(in, distCh, stop, &wg2) + + time.Sleep(time.Millisecond * 10 * time.Duration(cc)) + go func() { + for i := uint64(0); i < testFetcherReqCount; i++ { + in <- i + } + }() + + go func() { + for i := uint64(0); i < testFetcherReqCount; i++ { + bv := <-out + if !bytes.Equal(bv, fetcherTestVector(0, i)) { + if len(bv) != 10 { + t.Errorf("Vector #%d length is %d, expected 10", i, len(bv)) + } else { + j := binary.BigEndian.Uint64(bv[2:10]) + t.Errorf("Expected vector #%d, fetched #%d", i, j) + } + } + } + wg.Done() + }() + } + + wg.Wait() + close(stop) + if reqCount != testFetcherReqCount { + t.Errorf("Request count mismatch: expected %v, got %v", testFetcherReqCount, reqCount) + } +} diff --git a/core/bloombits/matcher.go b/core/bloombits/matcher.go new file mode 100644 index 000000000..5a7df6b1c --- /dev/null +++ b/core/bloombits/matcher.go @@ -0,0 +1,579 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. +package bloombits + +import ( + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/bitutil" + "github.com/ethereum/go-ethereum/core/types" +) + +const channelCap = 100 + +// fetcher handles bit vector retrieval pipelines for a single bit index +type fetcher struct { + bloomIndex uint + requestMap map[uint64]fetchRequest + requestLock sync.RWMutex +} + +// fetchRequest represents the state of a bit vector requested from a fetcher. When a distRequest has been sent to the distributor but +// the data has not been delivered yet, queued is true. When delivered, it is stored in the data field and the delivered channel is closed. +type fetchRequest struct { + data []byte + queued bool + delivered chan struct{} +} + +// distRequest is sent by the fetcher to the distributor which groups and prioritizes these requests. +type distRequest struct { + bloomIndex uint + sectionIndex uint64 +} + +// fetch creates a retrieval pipeline, receiving section indexes from sectionCh and returning the results +// in the same order through the returned channel. Multiple fetch instances of the same fetcher are allowed +// to run in parallel, in case the same bit index appears multiple times in the filter structure. Each section +// is requested only once, requests are sent to the request distributor (part of Matcher) through distCh. +func (f *fetcher) fetch(sectionCh chan uint64, distCh chan distRequest, stop chan struct{}, wg *sync.WaitGroup) chan []byte { + dataCh := make(chan []byte, channelCap) + returnCh := make(chan uint64, channelCap) + wg.Add(2) + + go func() { + defer wg.Done() + defer close(returnCh) + + for { + select { + case <-stop: + return + case idx, ok := <-sectionCh: + if !ok { + return + } + + req := false + f.requestLock.Lock() + r := f.requestMap[idx] + if r.data == nil { + req = !r.queued + r.queued = true + if r.delivered == nil { + r.delivered = make(chan struct{}) + } + f.requestMap[idx] = r + } + f.requestLock.Unlock() + if req { + distCh <- distRequest{bloomIndex: f.bloomIndex, sectionIndex: idx} // success is guaranteed, distibuteRequests shuts down after fetch + } + select { + case <-stop: + return + case returnCh <- idx: + } + } + } + }() + + go func() { + defer wg.Done() + defer close(dataCh) + + for { + select { + case <-stop: + return + case idx, ok := <-returnCh: + if !ok { + return + } + + f.requestLock.RLock() + r := f.requestMap[idx] + f.requestLock.RUnlock() + + if r.data == nil { + select { + case <-stop: + return + case <-r.delivered: + f.requestLock.RLock() + r = f.requestMap[idx] + f.requestLock.RUnlock() + } + } + select { + case <-stop: + return + case dataCh <- r.data: + } + } + } + }() + + return dataCh +} + +// deliver is called by the request distributor when a reply to a request has +// arrived +func (f *fetcher) deliver(sectionIdxList []uint64, data [][]byte) { + f.requestLock.Lock() + defer f.requestLock.Unlock() + + for i, sectionIdx := range sectionIdxList { + r := f.requestMap[sectionIdx] + if r.data != nil { + panic("BloomBits section data delivered twice") + } + r.data = data[i] + close(r.delivered) + f.requestMap[sectionIdx] = r + } +} + +// Matcher is a pipelined structure of fetchers and logic matchers which perform +// binary AND/OR operations on the bitstreams, finally creating a stream of potential matches. +type Matcher struct { + addresses []types.BloomIndexList + topics [][]types.BloomIndexList + fetchers map[uint]*fetcher + sectionSize uint64 + + distCh chan distRequest + reqs map[uint][]uint64 + freeQueues map[uint]struct{} + allocQueue []chan uint + running bool + stop chan struct{} + lock sync.Mutex + wg, distWg sync.WaitGroup +} + +// NewMatcher creates a new Matcher instance +func NewMatcher(sectionSize uint64, addresses []common.Address, topics [][]common.Hash) *Matcher { + m := &Matcher{ + fetchers: make(map[uint]*fetcher), + reqs: make(map[uint][]uint64), + freeQueues: make(map[uint]struct{}), + distCh: make(chan distRequest, channelCap), + sectionSize: sectionSize, + } + m.setAddresses(addresses) + m.setTopics(topics) + return m +} + +// setAddresses matches only logs that are generated from addresses that are included +// in the given addresses. +func (m *Matcher) setAddresses(addresses []common.Address) { + m.addresses = make([]types.BloomIndexList, len(addresses)) + for i, address := range addresses { + m.addresses[i] = types.BloomIndexes(address.Bytes()) + } + + for _, bloomIndexList := range m.addresses { + for _, bloomIndex := range bloomIndexList { + m.newFetcher(bloomIndex) + } + } +} + +// setTopics matches only logs that have topics matching the given topics. +func (m *Matcher) setTopics(topics [][]common.Hash) { + m.topics = nil +loop: + for _, topicList := range topics { + t := make([]types.BloomIndexList, len(topicList)) + for i, topic := range topicList { + if (topic == common.Hash{}) { + continue loop + } + t[i] = types.BloomIndexes(topic.Bytes()) + } + m.topics = append(m.topics, t) + } + + for _, bloomIndexLists := range m.topics { + for _, bloomIndexList := range bloomIndexLists { + for _, bloomIndex := range bloomIndexList { + m.newFetcher(bloomIndex) + } + } + } +} + +// match creates a daisy-chain of sub-matchers, one for the address set and one for each topic set, each +// sub-matcher receiving a section only if the previous ones have all found a potential match in one of +// the blocks of the section, then binary AND-ing its own matches and forwaring the result to the next one +func (m *Matcher) match(processCh chan partialMatches) chan partialMatches { + indexLists := m.topics + if len(m.addresses) > 0 { + indexLists = append([][]types.BloomIndexList{m.addresses}, indexLists...) + } + m.distributeRequests() + + for _, subIndexList := range indexLists { + processCh = m.subMatch(processCh, subIndexList) + } + return processCh +} + +// partialMatches with a non-nil vector represents a section in which some sub-matchers have already +// found potential matches. Subsequent sub-matchers will binary AND their matches with this vector. +// If vector is nil, it represents a section to be processed by the first sub-matcher. +type partialMatches struct { + sectionIndex uint64 + vector []byte +} + +// newFetcher adds a fetcher for the given bit index if it has not existed before +func (m *Matcher) newFetcher(idx uint) { + if _, ok := m.fetchers[idx]; ok { + return + } + f := &fetcher{ + bloomIndex: idx, + requestMap: make(map[uint64]fetchRequest), + } + m.fetchers[idx] = f +} + +// subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then +// binary AND-s the result to the daisy-chain input (processCh) and forwards it to the daisy-chain output. +// The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to +// that address/topic, and binary AND-ing those vectors together. +func (m *Matcher) subMatch(processCh chan partialMatches, bloomIndexLists []types.BloomIndexList) chan partialMatches { + // set up fetchers + fetchIndexChannels := make([][3]chan uint64, len(bloomIndexLists)) + fetchDataChannels := make([][3]chan []byte, len(bloomIndexLists)) + for i, bloomIndexList := range bloomIndexLists { + for j, bloomIndex := range bloomIndexList { + fetchIndexChannels[i][j] = make(chan uint64, channelCap) + fetchDataChannels[i][j] = m.fetchers[bloomIndex].fetch(fetchIndexChannels[i][j], m.distCh, m.stop, &m.wg) + } + } + + fetchedCh := make(chan partialMatches, channelCap) // entries from processCh are forwarded here after fetches have been initiated + resultsCh := make(chan partialMatches, channelCap) + + m.wg.Add(2) + // goroutine for starting retrievals + go func() { + defer m.wg.Done() + + for { + select { + case <-m.stop: + return + case s, ok := <-processCh: + if !ok { + close(fetchedCh) + for _, fetchIndexChs := range fetchIndexChannels { + for _, fetchIndexCh := range fetchIndexChs { + close(fetchIndexCh) + } + } + return + } + + for _, fetchIndexChs := range fetchIndexChannels { + for _, fetchIndexCh := range fetchIndexChs { + select { + case <-m.stop: + return + case fetchIndexCh <- s.sectionIndex: + } + } + } + select { + case <-m.stop: + return + case fetchedCh <- s: + } + } + } + }() + + // goroutine for processing retrieved data + go func() { + defer m.wg.Done() + + for { + select { + case <-m.stop: + return + case s, ok := <-fetchedCh: + if !ok { + close(resultsCh) + return + } + + var orVector []byte + for _, fetchDataChs := range fetchDataChannels { + var andVector []byte + for _, fetchDataCh := range fetchDataChs { + var data []byte + select { + case <-m.stop: + return + case data = <-fetchDataCh: + } + if andVector == nil { + andVector = make([]byte, int(m.sectionSize/8)) + copy(andVector, data) + } else { + bitutil.ANDBytes(andVector, andVector, data) + } + } + if orVector == nil { + orVector = andVector + } else { + bitutil.ORBytes(orVector, orVector, andVector) + } + } + + if orVector == nil { + orVector = make([]byte, int(m.sectionSize/8)) + } + if s.vector != nil { + bitutil.ANDBytes(orVector, orVector, s.vector) + } + if bitutil.TestBytes(orVector) { + select { + case <-m.stop: + return + case resultsCh <- partialMatches{s.sectionIndex, orVector}: + } + } + } + } + }() + + return resultsCh +} + +// Start starts the matching process and returns a stream of bloom matches in +// a given range of blocks. +// It returns a results channel immediately and stops if Stop is called or there +// are no more matches in the range (in which case the results channel is closed). +// Start/Stop can be called multiple times for different ranges, in which case already +// delivered bit vectors are not requested again. +func (m *Matcher) Start(begin, end uint64) chan uint64 { + m.stop = make(chan struct{}) + processCh := make(chan partialMatches, channelCap) + resultsCh := make(chan uint64, channelCap) + + res := m.match(processCh) + + startSection := begin / m.sectionSize + endSection := end / m.sectionSize + + m.wg.Add(2) + go func() { + defer m.wg.Done() + defer close(processCh) + + for i := startSection; i <= endSection; i++ { + select { + case processCh <- partialMatches{i, nil}: + case <-m.stop: + return + } + } + }() + + go func() { + defer m.wg.Done() + defer close(resultsCh) + + for { + select { + case r, ok := <-res: + if !ok { + return + } + sectionStart := r.sectionIndex * m.sectionSize + s := sectionStart + if begin > s { + s = begin + } + e := sectionStart + m.sectionSize - 1 + if end < e { + e = end + } + for i := s; i <= e; i++ { + b := r.vector[(i-sectionStart)/8] + bit := 7 - i%8 + if b != 0 { + if b&(1<<bit) != 0 { + select { + case <-m.stop: + return + case resultsCh <- i: + } + } + } else { + i += bit + } + } + + case <-m.stop: + return + } + } + }() + + return resultsCh +} + +// Stop stops the matching process +func (m *Matcher) Stop() { + close(m.stop) + m.distWg.Wait() +} + +// distributeRequests receives requests from the fetchers and either queues them +// or immediately forwards them to one of the waiting NextRequest functions. +// Requests with a lower section idx are always prioritized. +func (m *Matcher) distributeRequests() { + m.distWg.Add(1) + stopDist := make(chan struct{}) + go func() { + <-m.stop + m.wg.Wait() + close(stopDist) + }() + + m.running = true + + go func() { + for { + select { + case r := <-m.distCh: + m.lock.Lock() + queue := m.reqs[r.bloomIndex] + i := 0 + for i < len(queue) && r.sectionIndex > queue[i] { + i++ + } + queue = append(queue, 0) + copy(queue[i+1:], queue[i:len(queue)-1]) + queue[i] = r.sectionIndex + m.reqs[r.bloomIndex] = queue + if len(queue) == 1 { + m.freeQueue(r.bloomIndex) + } + m.lock.Unlock() + case <-stopDist: + m.lock.Lock() + for _, ch := range m.allocQueue { + close(ch) + } + m.allocQueue = nil + m.running = false + m.lock.Unlock() + m.distWg.Done() + return + } + } + }() +} + +// freeQueue marks a queue as free if there are no AllocSectionQueue functions +// waiting for allocation. If there is someone waiting, the queue is immediately +// allocated. +func (m *Matcher) freeQueue(bloomIndex uint) { + if len(m.allocQueue) > 0 { + m.allocQueue[0] <- bloomIndex + m.allocQueue = m.allocQueue[1:] + } else { + m.freeQueues[bloomIndex] = struct{}{} + } +} + +// AllocSectionQueue allocates a queue of requested section indexes belonging to the same +// bloom bit index for a client process that can either immediately fetch the contents +// of the queue or wait a little while for more section indexes to be requested. +func (m *Matcher) AllocSectionQueue() (uint, bool) { + m.lock.Lock() + if !m.running { + m.lock.Unlock() + return 0, false + } + + var allocCh chan uint + if len(m.freeQueues) > 0 { + var ( + found bool + bestSection uint64 + bestIndex uint + ) + for bloomIndex, _ := range m.freeQueues { + if !found || m.reqs[bloomIndex][0] < bestSection { + found = true + bestIndex = bloomIndex + bestSection = m.reqs[bloomIndex][0] + } + } + delete(m.freeQueues, bestIndex) + m.lock.Unlock() + return bestIndex, true + } else { + allocCh = make(chan uint) + m.allocQueue = append(m.allocQueue, allocCh) + } + m.lock.Unlock() + + bloomIndex, ok := <-allocCh + return bloomIndex, ok +} + +// SectionCount returns the length of the section index queue belonging to the given bloom bit index +func (m *Matcher) SectionCount(bloomIndex uint) int { + m.lock.Lock() + defer m.lock.Unlock() + + return len(m.reqs[bloomIndex]) +} + +// FetchSections fetches all or part of an already allocated queue and deallocates it +func (m *Matcher) FetchSections(bloomIndex uint, maxCount int) []uint64 { + m.lock.Lock() + defer m.lock.Unlock() + + queue := m.reqs[bloomIndex] + if maxCount < len(queue) { + // return only part of the existing queue, mark the rest as free + m.reqs[bloomIndex] = queue[maxCount:] + m.freeQueue(bloomIndex) + return queue[:maxCount] + } else { + // return the entire queue + delete(m.reqs, bloomIndex) + return queue + } +} + +// Deliver delivers a bit vector to the appropriate fetcher. +// It is possible to deliver data even after Stop has been called. Once a vector has been +// requested, the matcher will keep waiting for delivery. +func (m *Matcher) Deliver(bloomIndex uint, sectionIdxList []uint64, data [][]byte) { + m.fetchers[bloomIndex].deliver(sectionIdxList, data) +} diff --git a/core/bloombits/matcher_test.go b/core/bloombits/matcher_test.go new file mode 100644 index 000000000..bef1491b8 --- /dev/null +++ b/core/bloombits/matcher_test.go @@ -0,0 +1,196 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. +package bloombits + +import ( + "math/rand" + "sync/atomic" + "testing" + "time" + + "github.com/ethereum/go-ethereum/core/types" +) + +const testSectionSize = 4096 + +func matcherTestVector(b uint, s uint64) []byte { + r := make([]byte, testSectionSize/8) + for i, _ := range r { + var bb byte + for bit := 0; bit < 8; bit++ { + blockIdx := s*testSectionSize + uint64(i*8+bit) + bb += bb + if (blockIdx % uint64(b)) == 0 { + bb++ + } + } + r[i] = bb + } + return r +} + +func expMatch1(idxs types.BloomIndexList, i uint64) bool { + for _, ii := range idxs { + if (i % uint64(ii)) != 0 { + return false + } + } + return true +} + +func expMatch2(idxs []types.BloomIndexList, i uint64) bool { + for _, ii := range idxs { + if expMatch1(ii, i) { + return true + } + } + return false +} + +func expMatch3(idxs [][]types.BloomIndexList, i uint64) bool { + for _, ii := range idxs { + if !expMatch2(ii, i) { + return false + } + } + return true +} + +func testServeMatcher(m *Matcher, stop chan struct{}, cnt *uint32, maxRequestLen int) { + // serve matcher with test vectors + for i := 0; i < 10; i++ { + go func() { + for { + select { + case <-stop: + return + default: + } + b, ok := m.AllocSectionQueue() + if !ok { + return + } + if m.SectionCount(b) < maxRequestLen { + time.Sleep(time.Microsecond * 100) + } + s := m.FetchSections(b, maxRequestLen) + res := make([][]byte, len(s)) + for i, ss := range s { + res[i] = matcherTestVector(b, ss) + atomic.AddUint32(cnt, 1) + } + m.Deliver(b, s, res) + } + }() + } +} + +func testMatcher(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32) uint32 { + count1 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 1) + count16 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 16) + if count1 != count16 { + t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: request count mismatch, %v with maxReqCount = 1 vs. %v with maxReqCount = 16", idxs, cnt, stopOnMatches, count1, count16) + } + return count1 +} + +func testMatcherWithReqCount(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32, maxReqCount int) uint32 { + m := NewMatcher(testSectionSize, nil, nil) + + for _, idxss := range idxs { + for _, idxs := range idxss { + for _, idx := range idxs { + m.newFetcher(idx) + } + } + } + + m.addresses = idxs[0] + m.topics = idxs[1:] + var reqCount uint32 + + stop := make(chan struct{}) + chn := m.Start(0, cnt-1) + testServeMatcher(m, stop, &reqCount, maxReqCount) + + for i := uint64(0); i < cnt; i++ { + if expMatch3(idxs, i) { + match, ok := <-chn + if !ok { + t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected #%v, results channel closed", idxs, cnt, stopOnMatches, i) + return 0 + } + if match != i { + t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected #%v, got #%v", idxs, cnt, stopOnMatches, i, match) + } + if stopOnMatches { + m.Stop() + close(stop) + stop = make(chan struct{}) + chn = m.Start(i+1, cnt-1) + testServeMatcher(m, stop, &reqCount, maxReqCount) + } + } + } + match, ok := <-chn + if ok { + t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected closed channel, got #%v", idxs, cnt, stopOnMatches, match) + } + m.Stop() + close(stop) + + if expCount != 0 && expCount != reqCount { + t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: request count mismatch, expected #%v, got #%v", idxs, cnt, stopOnMatches, expCount, reqCount) + } + + return reqCount +} + +func testRandomIdxs(l []int, max int) [][]types.BloomIndexList { + res := make([][]types.BloomIndexList, len(l)) + for i, ll := range l { + res[i] = make([]types.BloomIndexList, ll) + for j, _ := range res[i] { + for k, _ := range res[i][j] { + res[i][j][k] = uint(rand.Intn(max-1) + 2) + } + } + } + return res +} + +func TestMatcher(t *testing.T) { + testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, false, 75) + testMatcher(t, [][]types.BloomIndexList{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, false, 81) + testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, false, 36) +} + +func TestMatcherStopOnMatches(t *testing.T) { + testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, true, 75) + testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, true, 36) +} + +func TestMatcherRandom(t *testing.T) { + for i := 0; i < 20; i++ { + testMatcher(t, testRandomIdxs([]int{1}, 50), 100000, false, 0) + testMatcher(t, testRandomIdxs([]int{3}, 50), 100000, false, 0) + testMatcher(t, testRandomIdxs([]int{2, 2, 2}, 20), 100000, false, 0) + testMatcher(t, testRandomIdxs([]int{5, 5, 5}, 50), 100000, false, 0) + idxs := testRandomIdxs([]int{2, 2, 2}, 20) + reqCount := testMatcher(t, idxs, 10000, false, 0) + testMatcher(t, idxs, 10000, true, reqCount) + } +} diff --git a/core/bloombits/utils.go b/core/bloombits/utils.go new file mode 100644 index 000000000..d0755cb65 --- /dev/null +++ b/core/bloombits/utils.go @@ -0,0 +1,63 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. +package bloombits + +import ( + "github.com/ethereum/go-ethereum/core/types" +) + +const BloomLength = 2048 + +// BloomBitsCreator takes SectionSize number of header bloom filters and calculates the bloomBits vectors of the section +type BloomBitsCreator struct { + blooms [BloomLength][]byte + sectionSize, bitIndex uint64 +} + +func NewBloomBitsCreator(sectionSize uint64) *BloomBitsCreator { + b := &BloomBitsCreator{sectionSize: sectionSize} + for i, _ := range b.blooms { + b.blooms[i] = make([]byte, sectionSize/8) + } + return b +} + +// AddHeaderBloom takes a single bloom filter and sets the corresponding bit column in memory accordingly +func (b *BloomBitsCreator) AddHeaderBloom(bloom types.Bloom) { + if b.bitIndex >= b.sectionSize { + panic("too many header blooms added") + } + + byteIdx := b.bitIndex / 8 + bitMask := byte(1) << byte(7-b.bitIndex%8) + for bloomBitIdx, _ := range b.blooms { + bloomByteIdx := BloomLength/8 - 1 - bloomBitIdx/8 + bloomBitMask := byte(1) << byte(bloomBitIdx%8) + if (bloom[bloomByteIdx] & bloomBitMask) != 0 { + b.blooms[bloomBitIdx][byteIdx] |= bitMask + } + } + b.bitIndex++ +} + +// GetBitVector returns the bit vector belonging to the given bit index after header blooms have been added +func (b *BloomBitsCreator) GetBitVector(idx uint) []byte { + if b.bitIndex != b.sectionSize { + panic("not enough header blooms added") + } + + return b.blooms[idx][:] +} diff --git a/core/chain_indexer.go b/core/chain_indexer.go index 9a88a5b1b..56360b59a 100644 --- a/core/chain_indexer.go +++ b/core/chain_indexer.go @@ -36,7 +36,7 @@ import ( type ChainIndexerBackend interface { // Reset initiates the processing of a new chain segment, potentially terminating // any partially completed operations (in case of a reorg). - Reset(section uint64) + Reset(section uint64, lastSectionHead common.Hash) // Process crunches through the next header in the chain segment. The caller // will ensure a sequential order of headers. @@ -44,7 +44,7 @@ type ChainIndexerBackend interface { // Commit finalizes the section metadata and stores it into the database. This // interface will usually be a batch writer. - Commit(db ethdb.Database) error + Commit() error } // ChainIndexer does a post-processing job for equally sized sections of the @@ -101,10 +101,34 @@ func NewChainIndexer(chainDb, indexDb ethdb.Database, backend ChainIndexerBacken return c } +// AddKnownSectionHead marks a new section head as known/processed if it is newer +// than the already known best section head +func (c *ChainIndexer) AddKnownSectionHead(section uint64, shead common.Hash) { + c.lock.Lock() + defer c.lock.Unlock() + + if section < c.storedSections { + return + } + c.setSectionHead(section, shead) + c.setValidSections(section + 1) +} + +// IndexerChain interface is used for connecting the indexer to a blockchain +type IndexerChain interface { + CurrentHeader() *types.Header + SubscribeChainEvent(ch chan<- ChainEvent) event.Subscription +} + // Start creates a goroutine to feed chain head events into the indexer for -// cascading background processing. -func (c *ChainIndexer) Start(currentHeader *types.Header, eventMux *event.TypeMux) { - go c.eventLoop(currentHeader, eventMux) +// cascading background processing. Children do not need to be started, they +// are notified about new events by their parents. +func (c *ChainIndexer) Start(chain IndexerChain) { + ch := make(chan ChainEvent, 10) + sub := chain.SubscribeChainEvent(ch) + currentHeader := chain.CurrentHeader() + + go c.eventLoop(currentHeader, ch, sub) } // Close tears down all goroutines belonging to the indexer and returns any error @@ -125,6 +149,14 @@ func (c *ChainIndexer) Close() error { errs = append(errs, err) } } + + // Close all children + for _, child := range c.children { + if err := child.Close(); err != nil { + errs = append(errs, err) + } + } + // Return any failures switch { case len(errs) == 0: @@ -141,12 +173,10 @@ func (c *ChainIndexer) Close() error { // eventLoop is a secondary - optional - event loop of the indexer which is only // started for the outermost indexer to push chain head events into a processing // queue. -func (c *ChainIndexer) eventLoop(currentHeader *types.Header, eventMux *event.TypeMux) { +func (c *ChainIndexer) eventLoop(currentHeader *types.Header, ch chan ChainEvent, sub event.Subscription) { // Mark the chain indexer as active, requiring an additional teardown atomic.StoreUint32(&c.active, 1) - // Subscribe to chain head events - sub := eventMux.Subscribe(ChainEvent{}) defer sub.Unsubscribe() // Fire the initial new head event to start any outstanding processing @@ -163,14 +193,14 @@ func (c *ChainIndexer) eventLoop(currentHeader *types.Header, eventMux *event.Ty errc <- nil return - case ev, ok := <-sub.Chan(): + case ev, ok := <-ch: // Received a new event, ensure it's not nil (closing) and update if !ok { errc := <-c.quit errc <- nil return } - header := ev.Data.(ChainEvent).Block.Header() + header := ev.Block.Header() if header.ParentHash != prevHash { c.newHead(FindCommonAncestor(c.chainDb, prevHeader, header).Number.Uint64(), true) } @@ -226,7 +256,10 @@ func (c *ChainIndexer) newHead(head uint64, reorg bool) { // updateLoop is the main event loop of the indexer which pushes chain segments // down into the processing backend. func (c *ChainIndexer) updateLoop() { - var updated time.Time + var ( + updated time.Time + updateMsg bool + ) for { select { @@ -242,6 +275,7 @@ func (c *ChainIndexer) updateLoop() { // Periodically print an upgrade log message to the user if time.Since(updated) > 8*time.Second { if c.knownSections > c.storedSections+1 { + updateMsg = true c.log.Info("Upgrading chain index", "percentage", c.storedSections*100/c.knownSections) } updated = time.Now() @@ -250,17 +284,24 @@ func (c *ChainIndexer) updateLoop() { section := c.storedSections var oldHead common.Hash if section > 0 { - oldHead = c.sectionHead(section - 1) + oldHead = c.SectionHead(section - 1) } // Process the newly defined section in the background c.lock.Unlock() newHead, err := c.processSection(section, oldHead) + if err != nil { + c.log.Error("Section processing failed", "error", err) + } c.lock.Lock() // If processing succeeded and no reorgs occcurred, mark the section completed - if err == nil && oldHead == c.sectionHead(section-1) { + if err == nil && oldHead == c.SectionHead(section-1) { c.setSectionHead(section, newHead) c.setValidSections(section + 1) + if c.storedSections == c.knownSections && updateMsg { + updateMsg = false + c.log.Info("Finished upgrading chain index") + } c.cascadedHead = c.storedSections*c.sectionSize - 1 for _, child := range c.children { @@ -295,7 +336,7 @@ func (c *ChainIndexer) processSection(section uint64, lastHead common.Hash) (com c.log.Trace("Processing new chain section", "section", section) // Reset and partial processing - c.backend.Reset(section) + c.backend.Reset(section, lastHead) for number := section * c.sectionSize; number < (section+1)*c.sectionSize; number++ { hash := GetCanonicalHash(c.chainDb, number) @@ -311,7 +352,8 @@ func (c *ChainIndexer) processSection(section uint64, lastHead common.Hash) (com c.backend.Process(header) lastHead = header.Hash() } - if err := c.backend.Commit(c.chainDb); err != nil { + if err := c.backend.Commit(); err != nil { + c.log.Error("Section commit failed", "error", err) return common.Hash{}, err } return lastHead, nil @@ -324,7 +366,7 @@ func (c *ChainIndexer) Sections() (uint64, uint64, common.Hash) { c.lock.Lock() defer c.lock.Unlock() - return c.storedSections, c.storedSections*c.sectionSize - 1, c.sectionHead(c.storedSections - 1) + return c.storedSections, c.storedSections*c.sectionSize - 1, c.SectionHead(c.storedSections - 1) } // AddChildIndexer adds a child ChainIndexer that can use the output of this one @@ -366,7 +408,7 @@ func (c *ChainIndexer) setValidSections(sections uint64) { // sectionHead retrieves the last block hash of a processed section from the // index database. -func (c *ChainIndexer) sectionHead(section uint64) common.Hash { +func (c *ChainIndexer) SectionHead(section uint64) common.Hash { var data [8]byte binary.BigEndian.PutUint64(data[:], section) diff --git a/core/chain_indexer_test.go b/core/chain_indexer_test.go index 780e46e43..247f52cf9 100644 --- a/core/chain_indexer_test.go +++ b/core/chain_indexer_test.go @@ -23,6 +23,7 @@ import ( "testing" "time" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" ) @@ -58,7 +59,6 @@ func testChainIndexer(t *testing.T, count int) { ) backends[i] = &testChainIndexBackend{t: t, processCh: make(chan uint64)} backends[i].indexer = NewChainIndexer(db, ethdb.NewTable(db, string([]byte{byte(i)})), backends[i], sectionSize, confirmsReq, 0, fmt.Sprintf("indexer-%d", i)) - defer backends[i].indexer.Close() if sections, _, _ := backends[i].indexer.Sections(); sections != 0 { t.Fatalf("Canonical section count mismatch: have %v, want %v", sections, 0) @@ -67,6 +67,7 @@ func testChainIndexer(t *testing.T, count int) { backends[i-1].indexer.AddChildIndexer(backends[i].indexer) } } + defer backends[0].indexer.Close() // parent indexer shuts down children // notify pings the root indexer about a new head or reorg, then expect // processed blocks if a section is processable notify := func(headNum, failNum uint64, reorg bool) { @@ -208,7 +209,7 @@ func (b *testChainIndexBackend) reorg(headNum uint64) uint64 { return b.stored * b.indexer.sectionSize } -func (b *testChainIndexBackend) Reset(section uint64) { +func (b *testChainIndexBackend) Reset(section uint64, lastSectionHead common.Hash) { b.section = section b.headerCnt = 0 } @@ -226,7 +227,7 @@ func (b *testChainIndexBackend) Process(header *types.Header) { } } -func (b *testChainIndexBackend) Commit(db ethdb.Database) error { +func (b *testChainIndexBackend) Commit() error { if b.headerCnt != b.indexer.sectionSize { b.t.Error("Not enough headers processed") } diff --git a/core/database_util.go b/core/database_util.go index 697111394..179d6f1b2 100644 --- a/core/database_util.go +++ b/core/database_util.go @@ -23,7 +23,6 @@ import ( "errors" "fmt" "math/big" - "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" @@ -48,9 +47,6 @@ var ( lookupPrefix = []byte("l") // lookupPrefix + hash -> transaction/receipt lookup metadata preimagePrefix = "secure-key-" // preimagePrefix + hash -> preimage - mipmapPre = []byte("mipmap-log-bloom-") - MIPMapLevels = []uint64{1000000, 500000, 100000, 50000, 1000} - configPrefix = []byte("ethereum-config-") // config prefix for the db // used by old db, now only used for conversion @@ -59,10 +55,10 @@ var ( ErrChainConfigNotFound = errors.New("ChainConfig not found") // general config not found error - mipmapBloomMu sync.Mutex // protect against race condition when updating mipmap blooms - preimageCounter = metrics.NewCounter("db/preimage/total") preimageHitCounter = metrics.NewCounter("db/preimage/hits") + + bloomBitsPrefix = []byte("bloomBits-") ) // txLookupEntry is a positional metadata to help looking up the data content of @@ -497,48 +493,6 @@ func DeleteTxLookupEntry(db ethdb.Database, hash common.Hash) { db.Delete(append(lookupPrefix, hash.Bytes()...)) } -// returns a formatted MIP mapped key by adding prefix, canonical number and level -// -// ex. fn(98, 1000) = (prefix || 1000 || 0) -func mipmapKey(num, level uint64) []byte { - lkey := make([]byte, 8) - binary.BigEndian.PutUint64(lkey, level) - key := new(big.Int).SetUint64(num / level * level) - - return append(mipmapPre, append(lkey, key.Bytes()...)...) -} - -// WriteMipmapBloom writes each address included in the receipts' logs to the -// MIP bloom bin. -func WriteMipmapBloom(db ethdb.Database, number uint64, receipts types.Receipts) error { - mipmapBloomMu.Lock() - defer mipmapBloomMu.Unlock() - - batch := db.NewBatch() - for _, level := range MIPMapLevels { - key := mipmapKey(number, level) - bloomDat, _ := db.Get(key) - bloom := types.BytesToBloom(bloomDat) - for _, receipt := range receipts { - for _, log := range receipt.Logs { - bloom.Add(log.Address.Big()) - } - } - batch.Put(key, bloom.Bytes()) - } - if err := batch.Write(); err != nil { - return fmt.Errorf("mipmap write fail for: %d: %v", number, err) - } - return nil -} - -// GetMipmapBloom returns a bloom filter using the number and level as input -// parameters. For available levels see MIPMapLevels. -func GetMipmapBloom(db ethdb.Database, number, level uint64) types.Bloom { - bloomDat, _ := db.Get(mipmapKey(number, level)) - return types.BytesToBloom(bloomDat) -} - // PreimageTable returns a Database instance with the key prefix for preimage entries. func PreimageTable(db ethdb.Database) ethdb.Database { return ethdb.NewTable(db, preimagePrefix) @@ -637,3 +591,22 @@ func FindCommonAncestor(db ethdb.Database, a, b *types.Header) *types.Header { } return a } + +// GetBloomBits reads the compressed bloomBits vector belonging to the given section and bit index from the db +func GetBloomBits(db ethdb.Database, bitIdx, sectionIdx uint64, sectionHead common.Hash) ([]byte, error) { + var encKey [10]byte + binary.BigEndian.PutUint16(encKey[0:2], uint16(bitIdx)) + binary.BigEndian.PutUint64(encKey[2:10], sectionIdx) + key := append(append(bloomBitsPrefix, encKey[:]...), sectionHead.Bytes()...) + bloomBits, err := db.Get(key) + return bloomBits, err +} + +// StoreBloomBits writes the compressed bloomBits vector belonging to the given section and bit index to the db +func StoreBloomBits(db ethdb.Database, bitIdx, sectionIdx uint64, sectionHead common.Hash, bloomBits []byte) { + var encKey [10]byte + binary.BigEndian.PutUint16(encKey[0:2], uint16(bitIdx)) + binary.BigEndian.PutUint64(encKey[2:10], sectionIdx) + key := append(append(bloomBitsPrefix, encKey[:]...), sectionHead.Bytes()...) + db.Put(key, bloomBits) +} diff --git a/core/database_util_test.go b/core/database_util_test.go index e91f1b593..940221a29 100644 --- a/core/database_util_test.go +++ b/core/database_util_test.go @@ -18,17 +18,13 @@ package core import ( "bytes" - "io/ioutil" "math/big" - "os" "testing" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto/sha3" "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" ) @@ -390,107 +386,3 @@ func TestBlockReceiptStorage(t *testing.T) { t.Fatalf("deleted receipts returned: %v", rs) } } - -func TestMipmapBloom(t *testing.T) { - db, _ := ethdb.NewMemDatabase() - - receipt1 := new(types.Receipt) - receipt1.Logs = []*types.Log{ - {Address: common.BytesToAddress([]byte("test"))}, - {Address: common.BytesToAddress([]byte("address"))}, - } - receipt2 := new(types.Receipt) - receipt2.Logs = []*types.Log{ - {Address: common.BytesToAddress([]byte("test"))}, - {Address: common.BytesToAddress([]byte("address1"))}, - } - - WriteMipmapBloom(db, 1, types.Receipts{receipt1}) - WriteMipmapBloom(db, 2, types.Receipts{receipt2}) - - for _, level := range MIPMapLevels { - bloom := GetMipmapBloom(db, 2, level) - if !bloom.Test(new(big.Int).SetBytes([]byte("address1"))) { - t.Error("expected test to be included on level:", level) - } - } - - // reset - db, _ = ethdb.NewMemDatabase() - receipt := new(types.Receipt) - receipt.Logs = []*types.Log{ - {Address: common.BytesToAddress([]byte("test"))}, - } - WriteMipmapBloom(db, 999, types.Receipts{receipt1}) - - receipt = new(types.Receipt) - receipt.Logs = []*types.Log{ - {Address: common.BytesToAddress([]byte("test 1"))}, - } - WriteMipmapBloom(db, 1000, types.Receipts{receipt}) - - bloom := GetMipmapBloom(db, 1000, 1000) - if bloom.TestBytes([]byte("test")) { - t.Error("test should not have been included") - } -} - -func TestMipmapChain(t *testing.T) { - dir, err := ioutil.TempDir("", "mipmap") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(dir) - - var ( - db, _ = ethdb.NewLDBDatabase(dir, 0, 0) - key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") - addr = crypto.PubkeyToAddress(key1.PublicKey) - addr2 = common.BytesToAddress([]byte("jeff")) - - hash1 = common.BytesToHash([]byte("topic1")) - ) - defer db.Close() - - gspec := &Genesis{ - Config: params.TestChainConfig, - Alloc: GenesisAlloc{addr: {Balance: big.NewInt(1000000)}}, - } - genesis := gspec.MustCommit(db) - chain, receipts := GenerateChain(params.TestChainConfig, genesis, db, 1010, func(i int, gen *BlockGen) { - var receipts types.Receipts - switch i { - case 1: - receipt := types.NewReceipt(nil, false, new(big.Int)) - receipt.Logs = []*types.Log{{Address: addr, Topics: []common.Hash{hash1}}} - gen.AddUncheckedReceipt(receipt) - receipts = types.Receipts{receipt} - case 1000: - receipt := types.NewReceipt(nil, false, new(big.Int)) - receipt.Logs = []*types.Log{{Address: addr2}} - gen.AddUncheckedReceipt(receipt) - receipts = types.Receipts{receipt} - - } - - // store the receipts - WriteMipmapBloom(db, uint64(i+1), receipts) - }) - for i, block := range chain { - WriteBlock(db, block) - if err := WriteCanonicalHash(db, block.Hash(), block.NumberU64()); err != nil { - t.Fatalf("failed to insert block number: %v", err) - } - if err := WriteHeadBlockHash(db, block.Hash()); err != nil { - t.Fatalf("failed to insert block number: %v", err) - } - if err := WriteBlockReceipts(db, block.Hash(), block.NumberU64(), receipts[i]); err != nil { - t.Fatal("error writing block receipts:", err) - } - } - - bloom := GetMipmapBloom(db, 0, 1000) - if bloom.TestBytes(addr2[:]) { - t.Error("address was included in bloom and should not have") - } -} diff --git a/core/types/bloom9.go b/core/types/bloom9.go index 60aacc301..bdc6e60e7 100644 --- a/core/types/bloom9.go +++ b/core/types/bloom9.go @@ -106,6 +106,20 @@ func LogsBloom(logs []*Log) *big.Int { return bin } +type BloomIndexList [3]uint + +// BloomIndexes returns the bloom filter bit indexes belonging to the given key +func BloomIndexes(b []byte) BloomIndexList { + b = crypto.Keccak256(b[:]) + + var r [3]uint + for i, _ := range r { + r[i] = (uint(b[i+i+1]) + (uint(b[i+i]) << 8)) & 2047 + } + + return r +} + func bloom9(b []byte) *big.Int { b = crypto.Keccak256(b[:]) |