aboutsummaryrefslogtreecommitdiffstats
path: root/trie/hasher.go
blob: 7b1d7793fa65dbdd998f357d50c4bd23e351785b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package trie

import (
    "hash"
    "sync"

    "github.com/ethereum/go-ethereum/common"
    "github.com/ethereum/go-ethereum/crypto/sha3"
    "github.com/ethereum/go-ethereum/rlp"
)

type hasher struct {
    tmp        sliceBuffer
    sha        keccakState
    cachegen   uint16
    cachelimit uint16
    onleaf     LeafCallback
}

// keccakState wraps sha3.state. In addition to the usual hash methods, it also supports
// Read to get a variable amount of data from the hash state. Read is faster than Sum
// because it doesn't copy the internal state, but also modifies the internal state.
type keccakState interface {
    hash.Hash
    Read([]byte) (int, error)
}

type sliceBuffer []byte

func (b *sliceBuffer) Write(data []byte) (n int, err error) {
    *b = append(*b, data...)
    return len(data), nil
}

func (b *sliceBuffer) Reset() {
    *b = (*b)[:0]
}

// hashers live in a global db.
var hasherPool = sync.Pool{
    New: func() interface{} {
        return &hasher{
            tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
            sha: sha3.NewKeccak256().(keccakState),
        }
    },
}

func newHasher(cachegen, cachelimit uint16, onleaf LeafCallback) *hasher {
    h := hasherPool.Get().(*hasher)
    h.cachegen, h.cachelimit, h.onleaf = cachegen, cachelimit, onleaf
    return h
}

func returnHasherToPool(h *hasher) {
    hasherPool.Put(h)
}

// hash collapses a node down into a hash node, also returning a copy of the
// original node initialized with the computed hash to replace the original one.
func (h *hasher) hash(n node, db *Database, force bool) (node, node, error) {
    // If we're not storing the node, just hashing, use available cached data
    if hash, dirty := n.cache(); hash != nil {
        if db == nil {
            return hash, n, nil
        }
        if n.canUnload(h.cachegen, h.cachelimit) {
            // Unload the node from cache. All of its subnodes will have a lower or equal
            // cache generation number.
            cacheUnloadCounter.Inc(1)
            return hash, hash, nil
        }
        if !dirty {
            return hash, n, nil
        }
    }
    // Trie not processed yet or needs storage, walk the children
    collapsed, cached, err := h.hashChildren(n, db)
    if err != nil {
        return hashNode{}, n, err
    }
    hashed, err := h.store(collapsed, db, force)
    if err != nil {
        return hashNode{}, n, err
    }
    // Cache the hash of the node for later reuse and remove
    // the dirty flag in commit mode. It's fine to assign these values directly
    // without copying the node first because hashChildren copies it.
    cachedHash, _ := hashed.(hashNode)
    switch cn := cached.(type) {
    case *shortNode:
        cn.flags.hash = cachedHash
        if db != nil {
            cn.flags.dirty = false
        }
    case *fullNode:
        cn.flags.hash = cachedHash
        if db != nil {
            cn.flags.dirty = false
        }
    }
    return hashed, cached, nil
}

// hashChildren replaces the children of a node with their hashes if the encoded
// size of the child is larger than a hash, returning the collapsed node as well
// as a replacement for the original node with the child hashes cached in.
func (h *hasher) hashChildren(original node, db *Database) (node, node, error) {
    var err error

    switch n := original.(type) {
    case *shortNode:
        // Hash the short node's child, caching the newly hashed subtree
        collapsed, cached := n.copy(), n.copy()
        collapsed.Key = hexToCompact(n.Key)
        cached.Key = common.CopyBytes(n.Key)

        if _, ok := n.Val.(valueNode); !ok {
            collapsed.Val, cached.Val, err = h.hash(n.Val, db, false)
            if err != nil {
                return original, original, err
            }
        }
        return collapsed, cached, nil

    case *fullNode:
        // Hash the full node's children, caching the newly hashed subtrees
        collapsed, cached := n.copy(), n.copy()

        for i := 0; i < 16; i++ {
            if n.Children[i] != nil {
                collapsed.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false)
                if err != nil {
                    return original, original, err
                }
            }
        }
        cached.Children[16] = n.Children[16]
        return collapsed, cached, nil

    default:
        // Value and hash nodes don't have children so they're left as were
        return n, original, nil
    }
}

// store hashes the node n and if we have a storage layer specified, it writes
// the key/value pair to it and tracks any node->child references as well as any
// node->external trie references.
func (h *hasher) store(n node, db *Database, force bool) (node, error) {
    // Don't store hashes or empty nodes.
    if _, isHash := n.(hashNode); n == nil || isHash {
        return n, nil
    }
    // Generate the RLP encoding of the node
    h.tmp.Reset()
    if err := rlp.Encode(&h.tmp, n); err != nil {
        panic("encode error: " + err.Error())
    }
    if len(h.tmp) < 32 && !force {
        return n, nil // Nodes smaller than 32 bytes are stored inside their parent
    }
    // Larger nodes are replaced by their hash and stored in the database.
    hash, _ := n.cache()
    if hash == nil {
        hash = h.makeHashNode(h.tmp)
    }

    if db != nil {
        // We are pooling the trie nodes into an intermediate memory cache
        hash := common.BytesToHash(hash)

        db.lock.Lock()
        db.insert(hash, h.tmp, n)
        db.lock.Unlock()

        // Track external references from account->storage trie
        if h.onleaf != nil {
            switch n := n.(type) {
            case *shortNode:
                if child, ok := n.Val.(valueNode); ok {
                    h.onleaf(child, hash)
                }
            case *fullNode:
                for i := 0; i < 16; i++ {
                    if child, ok := n.Children[i].(valueNode); ok {
                        h.onleaf(child, hash)
                    }
                }
            }
        }
    }
    return hash, nil
}

func (h *hasher) makeHashNode(data []byte) hashNode {
    n := make(hashNode, h.sha.Size())
    h.sha.Reset()
    h.sha.Write(data)
    h.sha.Read(n)
    return n
}