From 2e5d28c73f1d97865def3ffe8c7ad0a4819f15f3 Mon Sep 17 00:00:00 2001 From: obscuren Date: Mon, 11 Aug 2014 16:23:17 +0200 Subject: Added bloom filter & block filter methods --- ethchain/bloom.go | 47 ++++++++++++++++ ethchain/bloom_test.go | 20 +++++++ ethchain/filter.go | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ ethchain/filter_test.go | 7 +++ ethstate/manifest.go | 67 ++++++++++++++++++++++ 5 files changed, 287 insertions(+) create mode 100644 ethchain/bloom.go create mode 100644 ethchain/bloom_test.go create mode 100644 ethchain/filter.go create mode 100644 ethchain/filter_test.go create mode 100644 ethstate/manifest.go diff --git a/ethchain/bloom.go b/ethchain/bloom.go new file mode 100644 index 000000000..320ce73fc --- /dev/null +++ b/ethchain/bloom.go @@ -0,0 +1,47 @@ +package ethchain + +type BloomFilter struct { + bin []byte +} + +func NewBloomFilter(bin []byte) *BloomFilter { + if bin == nil { + bin = make([]byte, 255) + } + + return &BloomFilter{ + bin: bin, + } +} + +func (self *BloomFilter) Set(addr []byte) { + if len(addr) < 8 { + chainlogger.Warnf("err: bloom set to small: %x\n", addr) + + return + } + + for _, i := range addr[len(addr)-8:] { + self.bin[i] = 1 + } +} + +func (self *BloomFilter) Search(addr []byte) bool { + if len(addr) < 8 { + chainlogger.Warnf("err: bloom search to small: %x\n", addr) + + return false + } + + for _, i := range addr[len(addr)-8:] { + if self.bin[i] == 0 { + return false + } + } + + return true +} + +func (self *BloomFilter) Bin() []byte { + return self.bin +} diff --git a/ethchain/bloom_test.go b/ethchain/bloom_test.go new file mode 100644 index 000000000..ea53d539c --- /dev/null +++ b/ethchain/bloom_test.go @@ -0,0 +1,20 @@ +package ethchain + +import "testing" + +func TestBloomFilter(t *testing.T) { + bf := NewBloomFilter(nil) + + a := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 0} + bf.Set(a) + + b := []byte{10, 11, 12, 13, 14, 15, 16, 17, 18, 19} + + if bf.Search(a) == false { + t.Error("Expected 'a' to yield true using a bloom filter") + } + + if bf.Search(b) { + t.Error("Expected 'b' not to field trie using a bloom filter") + } +} diff --git a/ethchain/filter.go b/ethchain/filter.go new file mode 100644 index 000000000..c3b0a7f94 --- /dev/null +++ b/ethchain/filter.go @@ -0,0 +1,146 @@ +package ethchain + +import ( + "bytes" + "fmt" + + "github.com/ethereum/eth-go/ethstate" + "github.com/ethereum/eth-go/ethutil" +) + +// Filtering interface +type Filter struct { + eth EthManager + earliest []byte + latest []byte + skip int + from, to []byte + max int +} + +// Create a new filter which uses a bloom filter on blocks to figure out whether a particular block +// is interesting or not. +func NewFilter(eth EthManager) *Filter { + return &Filter{eth: eth} +} + +// Set the earliest and latest block for filtering. +// -1 = latest block (i.e., the current block) +// hash = particular hash from-to +func (self *Filter) SetEarliestBlock(earliest interface{}) { + e := ethutil.NewValue(earliest) + + // Check for -1 (latest) otherwise assume bytes + if e.Int() == -1 { + self.earliest = self.eth.BlockChain().CurrentBlock.Hash() + } else if e.Len() > 0 { + self.earliest = e.Bytes() + } else { + panic(fmt.Sprintf("earliest has to be either -1 or a valid hash: %v (%T)", e, e.Val)) + } +} + +func (self *Filter) SetLatestBlock(latest interface{}) { + l := ethutil.NewValue(latest) + + // Check for -1 (latest) otherwise assume bytes + if l.Int() == -1 { + self.latest = self.eth.BlockChain().CurrentBlock.Hash() + } else if l.Len() > 0 { + self.latest = l.Bytes() + } else { + panic(fmt.Sprintf("latest has to be either -1 or a valid hash: %v", l)) + } +} + +func (self *Filter) SetFrom(addr []byte) { + self.from = addr +} + +func (self *Filter) SetTo(addr []byte) { + self.to = addr +} + +func (self *Filter) SetMax(max int) { + self.max = max +} + +func (self *Filter) SetSkip(skip int) { + self.skip = skip +} + +// Run filters messages with the current parameters set +func (self *Filter) Find() []*ethstate.Message { + var messages []*ethstate.Message + + block := self.eth.BlockChain().GetBlock(self.latest) + + // skip N blocks (useful for pagination) + if self.skip > 0 { + for i := 0; i < i; i++ { + block = self.eth.BlockChain().GetBlock(block.PrevHash) + } + } + + // Start block filtering + quit := false + for i := 1; !quit && block != nil; i++ { + // Mark last check + if self.max == i || (len(self.earliest) > 0 && bytes.Compare(block.Hash(), self.earliest) == 0) { + quit = true + } + + // Use bloom filtering to see if this block is interesting given the + // current parameters + if self.bloomFilter(block) { + // Get the messages of the block + msgs, err := self.eth.StateManager().GetMessages(block) + if err != nil { + chainlogger.Warnln("err: filter get messages ", err) + + break + } + + // Filter the messages for interesting stuff + for _, message := range msgs { + if len(self.to) > 0 && bytes.Compare(message.To, self.to) != 0 { + continue + } + + if len(self.from) > 0 && bytes.Compare(message.From, self.from) != 0 { + continue + } + + messages = append(messages, message) + } + } + + block = self.eth.BlockChain().GetBlock(block.PrevHash) + } + + return messages +} + +func (self *Filter) bloomFilter(block *Block) bool { + fk := append([]byte("bloom"), block.Hash()...) + bin, err := self.eth.Db().Get(fk) + if err != nil { + panic(err) + } + + bloom := NewBloomFilter(bin) + + if len(self.from) > 0 { + if !bloom.Search(self.from) { + return false + } + } + + if len(self.to) > 0 { + if !bloom.Search(self.to) { + return false + } + } + + return true +} diff --git a/ethchain/filter_test.go b/ethchain/filter_test.go new file mode 100644 index 000000000..6dce51b3b --- /dev/null +++ b/ethchain/filter_test.go @@ -0,0 +1,7 @@ +package ethchain + +import "testing" + +func TestFilter(t *testing.T) { + filter := NewFilter() +} diff --git a/ethstate/manifest.go b/ethstate/manifest.go new file mode 100644 index 000000000..b771127a6 --- /dev/null +++ b/ethstate/manifest.go @@ -0,0 +1,67 @@ +package ethstate + +import ( + "fmt" + "math/big" +) + +// Object manifest +// +// The object manifest is used to keep changes to the state so we can keep track of the changes +// that occurred during a state transitioning phase. +type Manifest struct { + // XXX These will be handy in the future. Not important for now. + objectAddresses map[string]bool + storageAddresses map[string]map[string]bool + + ObjectChanges map[string]*StateObject + StorageChanges map[string]map[string]*big.Int + + Messages []*Message +} + +func NewManifest() *Manifest { + m := &Manifest{objectAddresses: make(map[string]bool), storageAddresses: make(map[string]map[string]bool)} + m.Reset() + + return m +} + +func (m *Manifest) Reset() { + m.ObjectChanges = make(map[string]*StateObject) + m.StorageChanges = make(map[string]map[string]*big.Int) +} + +func (m *Manifest) AddObjectChange(stateObject *StateObject) { + m.ObjectChanges[string(stateObject.Address())] = stateObject +} + +func (m *Manifest) AddStorageChange(stateObject *StateObject, storageAddr []byte, storage *big.Int) { + if m.StorageChanges[string(stateObject.Address())] == nil { + m.StorageChanges[string(stateObject.Address())] = make(map[string]*big.Int) + } + + m.StorageChanges[string(stateObject.Address())][string(storageAddr)] = storage +} + +func (self *Manifest) AddMessage(msg *Message) *Message { + self.Messages = append(self.Messages, msg) + + return msg +} + +type Message struct { + To, From []byte + Input []byte + Output []byte + Path int + Origin []byte + Timestamp int64 + Coinbase []byte + Block []byte + Number *big.Int +} + +func (self *Message) String() string { + return fmt.Sprintf("Message{to: %x from: %x input: %x output: %x origin: %x coinbase: %x block: %x number: %v timestamp: %d path: %d", self.To, self.From, self.Input, self.Output, self.Origin, self.Coinbase, self.Block, self.Number, self.Timestamp, self.Path) +} -- cgit v1.2.3