From 6dc14788a238f3e0ec786c6c04d476a3b957e645 Mon Sep 17 00:00:00 2001
From: Jeffrey Wilcke <geffobscura@gmail.com>
Date: Mon, 12 Oct 2015 17:58:51 +0200
Subject: core, eth/filters, miner, xeth: Optimised log filtering

Log filtering is now using a MIPmap like approach where addresses of
logs are added to a mapped bloom bin. The current levels for the MIP are
in ranges of 1.000.000, 500.000, 100.000, 50.000, 1.000. Logs are
therefor filtered in batches of 1.000.
---
 eth/filters/filter.go | 116 +++++++++++++++++++++++++++++---------------------
 1 file changed, 68 insertions(+), 48 deletions(-)

(limited to 'eth/filters/filter.go')

diff --git a/eth/filters/filter.go b/eth/filters/filter.go
index d3d430775..2e81ea177 100644
--- a/eth/filters/filter.go
+++ b/eth/filters/filter.go
@@ -17,6 +17,8 @@
 package filters
 
 import (
+	"math"
+
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core"
 	"github.com/ethereum/go-ethereum/core/types"
@@ -30,13 +32,10 @@ type AccountChange struct {
 
 // Filtering interface
 type Filter struct {
-	db       ethdb.Database
-	earliest int64
-	latest   int64
-	skip     int
-	address  []common.Address
-	max      int
-	topics   [][]common.Hash
+	db         ethdb.Database
+	begin, end int64
+	addresses  []common.Address
+	topics     [][]common.Hash
 
 	BlockCallback       func(*types.Block, vm.Logs)
 	TransactionCallback func(*types.Transaction)
@@ -52,59 +51,82 @@ func New(db ethdb.Database) *Filter {
 // Set the earliest and latest block for filtering.
 // -1 = latest block (i.e., the current block)
 // hash = particular hash from-to
-func (self *Filter) SetEarliestBlock(earliest int64) {
-	self.earliest = earliest
+func (self *Filter) SetBeginBlock(begin int64) {
+	self.begin = begin
 }
 
-func (self *Filter) SetLatestBlock(latest int64) {
-	self.latest = latest
+func (self *Filter) SetEndBlock(end int64) {
+	self.end = end
 }
 
-func (self *Filter) SetAddress(addr []common.Address) {
-	self.address = addr
+func (self *Filter) SetAddresses(addr []common.Address) {
+	self.addresses = addr
 }
 
 func (self *Filter) SetTopics(topics [][]common.Hash) {
 	self.topics = topics
 }
 
-func (self *Filter) SetMax(max int) {
-	self.max = max
-}
-
-func (self *Filter) SetSkip(skip int) {
-	self.skip = skip
-}
-
 // Run filters logs with the current parameters set
 func (self *Filter) Find() vm.Logs {
-	earliestBlock := core.GetBlock(self.db, core.GetHeadBlockHash(self.db))
-	var earliestBlockNo uint64 = uint64(self.earliest)
-	if self.earliest == -1 {
-		earliestBlockNo = earliestBlock.NumberU64()
+	latestBlock := core.GetBlock(self.db, core.GetHeadBlockHash(self.db))
+	var beginBlockNo uint64 = uint64(self.begin)
+	if self.begin == -1 {
+		beginBlockNo = latestBlock.NumberU64()
 	}
-	var latestBlockNo uint64 = uint64(self.latest)
-	if self.latest == -1 {
-		latestBlockNo = earliestBlock.NumberU64()
+	var endBlockNo uint64 = uint64(self.end)
+	if self.end == -1 {
+		endBlockNo = latestBlock.NumberU64()
 	}
 
-	var (
-		logs  vm.Logs
-		block *types.Block
-	)
-	hash := core.GetCanonicalHash(self.db, latestBlockNo)
-	if hash != (common.Hash{}) {
-		block = core.GetBlock(self.db, hash)
+	// if no addresses are present we can't make use of fast search which
+	// uses the mipmap bloom filters to check for fast inclusion and uses
+	// higher range probability in order to ensure at least a false positive
+	if len(self.addresses) == 0 {
+		return self.getLogs(beginBlockNo, endBlockNo)
 	}
+	return self.mipFind(beginBlockNo, endBlockNo, 0)
+}
 
-done:
-	for i := 0; block != nil; i++ {
-		// Quit on latest
-		switch {
-		case block.NumberU64() == 0:
-			break done
-		case block.NumberU64() < earliestBlockNo:
-			break done
+func (self *Filter) mipFind(start, end uint64, depth int) (logs vm.Logs) {
+	level := core.MIPMapLevels[depth]
+	// normalise numerator so we can work in level specific batches and
+	// work with the proper range checks
+	for num := start / level * level; num <= end; num += level {
+		// find addresses in bloom filters
+		bloom := core.GetMipmapBloom(self.db, num, level)
+		for _, addr := range self.addresses {
+			if bloom.TestBytes(addr[:]) {
+				// range check normalised values and make sure that
+				// we're resolving the correct range instead of the
+				// normalised values.
+				start := uint64(math.Max(float64(num), float64(start)))
+				end := uint64(math.Min(float64(num+level-1), float64(end)))
+				if depth+1 == len(core.MIPMapLevels) {
+					logs = append(logs, self.getLogs(start, end)...)
+				} else {
+					logs = append(logs, self.mipFind(start, end, depth+1)...)
+				}
+				// break so we don't check the same range for each
+				// possible address. Checks on multiple addresses
+				// are handled further down the stack.
+				break
+			}
+		}
+	}
+
+	return logs
+}
+
+func (self *Filter) getLogs(start, end uint64) (logs vm.Logs) {
+	var block *types.Block
+
+	for i := start; i <= end; i++ {
+		hash := core.GetCanonicalHash(self.db, i)
+		if hash != (common.Hash{}) {
+			block = core.GetBlock(self.db, hash)
+		} else { // block not found
+			return logs
 		}
 
 		// Use bloom filtering to see if this block is interesting given the
@@ -120,8 +142,6 @@ done:
 			}
 			logs = append(logs, self.FilterLogs(unfiltered)...)
 		}
-
-		block = core.GetBlock(self.db, block.ParentHash())
 	}
 
 	return logs
@@ -143,7 +163,7 @@ func (self *Filter) FilterLogs(logs vm.Logs) vm.Logs {
 	// Filter the logs for interesting stuff
 Logs:
 	for _, log := range logs {
-		if len(self.address) > 0 && !includes(self.address, log.Address) {
+		if len(self.addresses) > 0 && !includes(self.addresses, log.Address) {
 			continue
 		}
 
@@ -179,9 +199,9 @@ Logs:
 }
 
 func (self *Filter) bloomFilter(block *types.Block) bool {
-	if len(self.address) > 0 {
+	if len(self.addresses) > 0 {
 		var included bool
-		for _, addr := range self.address {
+		for _, addr := range self.addresses {
 			if types.BloomLookup(block.Bloom(), addr) {
 				included = true
 				break
-- 
cgit v1.2.3