aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/syndtr
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/syndtr')
-rw-r--r--vendor/github.com/syndtr/goleveldb/.travis.yml12
-rw-r--r--vendor/github.com/syndtr/goleveldb/LICENSE24
-rw-r--r--vendor/github.com/syndtr/goleveldb/README.md105
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/batch.go349
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go705
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go195
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/comparer.go67
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go51
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go57
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db.go1091
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go826
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go360
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go183
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_state.go234
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go325
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_util.go102
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_write.go443
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/doc.go90
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/errors.go20
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go78
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/filter.go31
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go116
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go60
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go184
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go242
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go132
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go304
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go524
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/key.go143
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go475
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go684
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/options.go107
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session.go208
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go302
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session_record.go323
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session_util.go258
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go583
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go34
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go65
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go81
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go86
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go78
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go218
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go179
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table.go529
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go1134
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table/table.go177
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go375
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util.go98
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go293
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go239
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go30
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go48
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/range.go32
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/util.go73
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/version.go524
56 files changed, 14286 insertions, 0 deletions
diff --git a/vendor/github.com/syndtr/goleveldb/.travis.yml b/vendor/github.com/syndtr/goleveldb/.travis.yml
new file mode 100644
index 000000000..82de37735
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/.travis.yml
@@ -0,0 +1,12 @@
+language: go
+
+go:
+ - 1.4
+ - 1.5
+ - 1.6
+ - 1.7
+ - tip
+
+script:
+ - go test -timeout 1h ./...
+ - go test -timeout 30m -race -run "TestDB_(Concurrent|GoleveldbIssue74)" ./leveldb
diff --git a/vendor/github.com/syndtr/goleveldb/LICENSE b/vendor/github.com/syndtr/goleveldb/LICENSE
new file mode 100644
index 000000000..4a772d1ab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/LICENSE
@@ -0,0 +1,24 @@
+Copyright 2012 Suryandaru Triandana <syndtr@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/syndtr/goleveldb/README.md b/vendor/github.com/syndtr/goleveldb/README.md
new file mode 100644
index 000000000..259286f55
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/README.md
@@ -0,0 +1,105 @@
+This is an implementation of the [LevelDB key/value database](http:code.google.com/p/leveldb) in the [Go programming language](http:golang.org).
+
+[![Build Status](https://travis-ci.org/syndtr/goleveldb.png?branch=master)](https://travis-ci.org/syndtr/goleveldb)
+
+Installation
+-----------
+
+ go get github.com/syndtr/goleveldb/leveldb
+
+Requirements
+-----------
+
+* Need at least `go1.4` or newer.
+
+Usage
+-----------
+
+Create or open a database:
+```go
+db, err := leveldb.OpenFile("path/to/db", nil)
+...
+defer db.Close()
+...
+```
+Read or modify the database content:
+```go
+// Remember that the contents of the returned slice should not be modified.
+data, err := db.Get([]byte("key"), nil)
+...
+err = db.Put([]byte("key"), []byte("value"), nil)
+...
+err = db.Delete([]byte("key"), nil)
+...
+```
+
+Iterate over database content:
+```go
+iter := db.NewIterator(nil, nil)
+for iter.Next() {
+ // Remember that the contents of the returned slice should not be modified, and
+ // only valid until the next call to Next.
+ key := iter.Key()
+ value := iter.Value()
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Seek-then-Iterate:
+```go
+iter := db.NewIterator(nil, nil)
+for ok := iter.Seek(key); ok; ok = iter.Next() {
+ // Use key/value.
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Iterate over subset of database content:
+```go
+iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil)
+for iter.Next() {
+ // Use key/value.
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Iterate over subset of database content with a particular prefix:
+```go
+iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
+for iter.Next() {
+ // Use key/value.
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Batch writes:
+```go
+batch := new(leveldb.Batch)
+batch.Put([]byte("foo"), []byte("value"))
+batch.Put([]byte("bar"), []byte("another value"))
+batch.Delete([]byte("baz"))
+err = db.Write(batch, nil)
+...
+```
+Use bloom filter:
+```go
+o := &opt.Options{
+ Filter: filter.NewBloomFilter(10),
+}
+db, err := leveldb.OpenFile("path/to/db", o)
+...
+defer db.Close()
+...
+```
+Documentation
+-----------
+
+You can read package documentation [here](http:godoc.org/github.com/syndtr/goleveldb).
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/batch.go b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
new file mode 100644
index 000000000..225920002
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
@@ -0,0 +1,349 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrBatchCorrupted records reason of batch corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrBatchCorrupted struct {
+ Reason string
+}
+
+func (e *ErrBatchCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason)
+}
+
+func newErrBatchCorrupted(reason string) error {
+ return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason})
+}
+
+const (
+ batchHeaderLen = 8 + 4
+ batchGrowRec = 3000
+ batchBufioSize = 16
+)
+
+// BatchReplay wraps basic batch operations.
+type BatchReplay interface {
+ Put(key, value []byte)
+ Delete(key []byte)
+}
+
+type batchIndex struct {
+ keyType keyType
+ keyPos, keyLen int
+ valuePos, valueLen int
+}
+
+func (index batchIndex) k(data []byte) []byte {
+ return data[index.keyPos : index.keyPos+index.keyLen]
+}
+
+func (index batchIndex) v(data []byte) []byte {
+ if index.valueLen != 0 {
+ return data[index.valuePos : index.valuePos+index.valueLen]
+ }
+ return nil
+}
+
+func (index batchIndex) kv(data []byte) (key, value []byte) {
+ return index.k(data), index.v(data)
+}
+
+// Batch is a write batch.
+type Batch struct {
+ data []byte
+ index []batchIndex
+
+ // internalLen is sums of key/value pair length plus 8-bytes internal key.
+ internalLen int
+}
+
+func (b *Batch) grow(n int) {
+ o := len(b.data)
+ if cap(b.data)-o < n {
+ div := 1
+ if len(b.index) > batchGrowRec {
+ div = len(b.index) / batchGrowRec
+ }
+ ndata := make([]byte, o, o+n+o/div)
+ copy(ndata, b.data)
+ b.data = ndata
+ }
+}
+
+func (b *Batch) appendRec(kt keyType, key, value []byte) {
+ n := 1 + binary.MaxVarintLen32 + len(key)
+ if kt == keyTypeVal {
+ n += binary.MaxVarintLen32 + len(value)
+ }
+ b.grow(n)
+ index := batchIndex{keyType: kt}
+ o := len(b.data)
+ data := b.data[:o+n]
+ data[o] = byte(kt)
+ o++
+ o += binary.PutUvarint(data[o:], uint64(len(key)))
+ index.keyPos = o
+ index.keyLen = len(key)
+ o += copy(data[o:], key)
+ if kt == keyTypeVal {
+ o += binary.PutUvarint(data[o:], uint64(len(value)))
+ index.valuePos = o
+ index.valueLen = len(value)
+ o += copy(data[o:], value)
+ }
+ b.data = data[:o]
+ b.index = append(b.index, index)
+ b.internalLen += index.keyLen + index.valueLen + 8
+}
+
+// Put appends 'put operation' of the given key/value pair to the batch.
+// It is safe to modify the contents of the argument after Put returns but not
+// before.
+func (b *Batch) Put(key, value []byte) {
+ b.appendRec(keyTypeVal, key, value)
+}
+
+// Delete appends 'delete operation' of the given key to the batch.
+// It is safe to modify the contents of the argument after Delete returns but
+// not before.
+func (b *Batch) Delete(key []byte) {
+ b.appendRec(keyTypeDel, key, nil)
+}
+
+// Dump dumps batch contents. The returned slice can be loaded into the
+// batch using Load method.
+// The returned slice is not its own copy, so the contents should not be
+// modified.
+func (b *Batch) Dump() []byte {
+ return b.data
+}
+
+// Load loads given slice into the batch. Previous contents of the batch
+// will be discarded.
+// The given slice will not be copied and will be used as batch buffer, so
+// it is not safe to modify the contents of the slice.
+func (b *Batch) Load(data []byte) error {
+ return b.decode(data, -1)
+}
+
+// Replay replays batch contents.
+func (b *Batch) Replay(r BatchReplay) error {
+ for _, index := range b.index {
+ switch index.keyType {
+ case keyTypeVal:
+ r.Put(index.k(b.data), index.v(b.data))
+ case keyTypeDel:
+ r.Delete(index.k(b.data))
+ }
+ }
+ return nil
+}
+
+// Len returns number of records in the batch.
+func (b *Batch) Len() int {
+ return len(b.index)
+}
+
+// Reset resets the batch.
+func (b *Batch) Reset() {
+ b.data = b.data[:0]
+ b.index = b.index[:0]
+ b.internalLen = 0
+}
+
+func (b *Batch) replayInternal(fn func(i int, kt keyType, k, v []byte) error) error {
+ for i, index := range b.index {
+ if err := fn(i, index.keyType, index.k(b.data), index.v(b.data)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (b *Batch) append(p *Batch) {
+ ob := len(b.data)
+ oi := len(b.index)
+ b.data = append(b.data, p.data...)
+ b.index = append(b.index, p.index...)
+ b.internalLen += p.internalLen
+
+ // Updating index offset.
+ if ob != 0 {
+ for ; oi < len(b.index); oi++ {
+ index := &b.index[oi]
+ index.keyPos += ob
+ if index.valueLen != 0 {
+ index.valuePos += ob
+ }
+ }
+ }
+}
+
+func (b *Batch) decode(data []byte, expectedLen int) error {
+ b.data = data
+ b.index = b.index[:0]
+ b.internalLen = 0
+ err := decodeBatch(data, func(i int, index batchIndex) error {
+ b.index = append(b.index, index)
+ b.internalLen += index.keyLen + index.valueLen + 8
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ if expectedLen >= 0 && len(b.index) != expectedLen {
+ return newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", expectedLen, len(b.index)))
+ }
+ return nil
+}
+
+func (b *Batch) putMem(seq uint64, mdb *memdb.DB) error {
+ var ik []byte
+ for i, index := range b.index {
+ ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+ if err := mdb.Put(ik, index.v(b.data)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (b *Batch) revertMem(seq uint64, mdb *memdb.DB) error {
+ var ik []byte
+ for i, index := range b.index {
+ ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+ if err := mdb.Delete(ik); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func newBatch() interface{} {
+ return &Batch{}
+}
+
+func decodeBatch(data []byte, fn func(i int, index batchIndex) error) error {
+ var index batchIndex
+ for i, o := 0, 0; o < len(data); i++ {
+ // Key type.
+ index.keyType = keyType(data[o])
+ if index.keyType > keyTypeVal {
+ return newErrBatchCorrupted(fmt.Sprintf("bad record: invalid type %#x", uint(index.keyType)))
+ }
+ o++
+
+ // Key.
+ x, n := binary.Uvarint(data[o:])
+ o += n
+ if n <= 0 || o+int(x) > len(data) {
+ return newErrBatchCorrupted("bad record: invalid key length")
+ }
+ index.keyPos = o
+ index.keyLen = int(x)
+ o += index.keyLen
+
+ // Value.
+ if index.keyType == keyTypeVal {
+ x, n = binary.Uvarint(data[o:])
+ o += n
+ if n <= 0 || o+int(x) > len(data) {
+ return newErrBatchCorrupted("bad record: invalid value length")
+ }
+ index.valuePos = o
+ index.valueLen = int(x)
+ o += index.valueLen
+ } else {
+ index.valuePos = 0
+ index.valueLen = 0
+ }
+
+ if err := fn(i, index); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func decodeBatchToMem(data []byte, expectSeq uint64, mdb *memdb.DB) (seq uint64, batchLen int, err error) {
+ seq, batchLen, err = decodeBatchHeader(data)
+ if err != nil {
+ return 0, 0, err
+ }
+ if seq < expectSeq {
+ return 0, 0, newErrBatchCorrupted("invalid sequence number")
+ }
+ data = data[batchHeaderLen:]
+ var ik []byte
+ var decodedLen int
+ err = decodeBatch(data, func(i int, index batchIndex) error {
+ if i >= batchLen {
+ return newErrBatchCorrupted("invalid records length")
+ }
+ ik = makeInternalKey(ik, index.k(data), seq+uint64(i), index.keyType)
+ if err := mdb.Put(ik, index.v(data)); err != nil {
+ return err
+ }
+ decodedLen++
+ return nil
+ })
+ if err == nil && decodedLen != batchLen {
+ err = newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", batchLen, decodedLen))
+ }
+ return
+}
+
+func encodeBatchHeader(dst []byte, seq uint64, batchLen int) []byte {
+ dst = ensureBuffer(dst, batchHeaderLen)
+ binary.LittleEndian.PutUint64(dst, seq)
+ binary.LittleEndian.PutUint32(dst[8:], uint32(batchLen))
+ return dst
+}
+
+func decodeBatchHeader(data []byte) (seq uint64, batchLen int, err error) {
+ if len(data) < batchHeaderLen {
+ return 0, 0, newErrBatchCorrupted("too short")
+ }
+
+ seq = binary.LittleEndian.Uint64(data)
+ batchLen = int(binary.LittleEndian.Uint32(data[8:]))
+ if batchLen < 0 {
+ return 0, 0, newErrBatchCorrupted("invalid records length")
+ }
+ return
+}
+
+func batchesLen(batches []*Batch) int {
+ batchLen := 0
+ for _, batch := range batches {
+ batchLen += batch.Len()
+ }
+ return batchLen
+}
+
+func writeBatchesWithHeader(wr io.Writer, batches []*Batch, seq uint64) error {
+ if _, err := wr.Write(encodeBatchHeader(nil, seq, batchesLen(batches))); err != nil {
+ return err
+ }
+ for _, batch := range batches {
+ if _, err := wr.Write(batch.data); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
new file mode 100644
index 000000000..c5940b232
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
@@ -0,0 +1,705 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package cache provides interface and implementation of a cache algorithms.
+package cache
+
+import (
+ "sync"
+ "sync/atomic"
+ "unsafe"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Cacher provides interface to implements a caching functionality.
+// An implementation must be safe for concurrent use.
+type Cacher interface {
+ // Capacity returns cache capacity.
+ Capacity() int
+
+ // SetCapacity sets cache capacity.
+ SetCapacity(capacity int)
+
+ // Promote promotes the 'cache node'.
+ Promote(n *Node)
+
+ // Ban evicts the 'cache node' and prevent subsequent 'promote'.
+ Ban(n *Node)
+
+ // Evict evicts the 'cache node'.
+ Evict(n *Node)
+
+ // EvictNS evicts 'cache node' with the given namespace.
+ EvictNS(ns uint64)
+
+ // EvictAll evicts all 'cache node'.
+ EvictAll()
+
+ // Close closes the 'cache tree'
+ Close() error
+}
+
+// Value is a 'cacheable object'. It may implements util.Releaser, if
+// so the the Release method will be called once object is released.
+type Value interface{}
+
+// NamespaceGetter provides convenient wrapper for namespace.
+type NamespaceGetter struct {
+ Cache *Cache
+ NS uint64
+}
+
+// Get simply calls Cache.Get() method.
+func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
+ return g.Cache.Get(g.NS, key, setFunc)
+}
+
+// The hash tables implementation is based on:
+// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu,
+// Kunlong Zhang, and Michael Spear.
+// ACM Symposium on Principles of Distributed Computing, Jul 2014.
+
+const (
+ mInitialSize = 1 << 4
+ mOverflowThreshold = 1 << 5
+ mOverflowGrowThreshold = 1 << 7
+)
+
+type mBucket struct {
+ mu sync.Mutex
+ node []*Node
+ frozen bool
+}
+
+func (b *mBucket) freeze() []*Node {
+ b.mu.Lock()
+ defer b.mu.Unlock()
+ if !b.frozen {
+ b.frozen = true
+ }
+ return b.node
+}
+
+func (b *mBucket) get(r *Cache, h *mNode, hash uint32, ns, key uint64, noset bool) (done, added bool, n *Node) {
+ b.mu.Lock()
+
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
+
+ // Scan the node.
+ for _, n := range b.node {
+ if n.hash == hash && n.ns == ns && n.key == key {
+ atomic.AddInt32(&n.ref, 1)
+ b.mu.Unlock()
+ return true, false, n
+ }
+ }
+
+ // Get only.
+ if noset {
+ b.mu.Unlock()
+ return true, false, nil
+ }
+
+ // Create node.
+ n = &Node{
+ r: r,
+ hash: hash,
+ ns: ns,
+ key: key,
+ ref: 1,
+ }
+ // Add node to bucket.
+ b.node = append(b.node, n)
+ bLen := len(b.node)
+ b.mu.Unlock()
+
+ // Update counter.
+ grow := atomic.AddInt32(&r.nodes, 1) >= h.growThreshold
+ if bLen > mOverflowThreshold {
+ grow = grow || atomic.AddInt32(&h.overflow, 1) >= mOverflowGrowThreshold
+ }
+
+ // Grow.
+ if grow && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) << 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+
+ return true, true, n
+}
+
+func (b *mBucket) delete(r *Cache, h *mNode, hash uint32, ns, key uint64) (done, deleted bool) {
+ b.mu.Lock()
+
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
+
+ // Scan the node.
+ var (
+ n *Node
+ bLen int
+ )
+ for i := range b.node {
+ n = b.node[i]
+ if n.ns == ns && n.key == key {
+ if atomic.LoadInt32(&n.ref) == 0 {
+ deleted = true
+
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Remove node from bucket.
+ b.node = append(b.node[:i], b.node[i+1:]...)
+ bLen = len(b.node)
+ }
+ break
+ }
+ }
+ b.mu.Unlock()
+
+ if deleted {
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+
+ // Update counter.
+ atomic.AddInt32(&r.size, int32(n.size)*-1)
+ shrink := atomic.AddInt32(&r.nodes, -1) < h.shrinkThreshold
+ if bLen >= mOverflowThreshold {
+ atomic.AddInt32(&h.overflow, -1)
+ }
+
+ // Shrink.
+ if shrink && len(h.buckets) > mInitialSize && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) >> 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+ }
+
+ return true, deleted
+}
+
+type mNode struct {
+ buckets []unsafe.Pointer // []*mBucket
+ mask uint32
+ pred unsafe.Pointer // *mNode
+ resizeInProgess int32
+
+ overflow int32
+ growThreshold int32
+ shrinkThreshold int32
+}
+
+func (n *mNode) initBucket(i uint32) *mBucket {
+ if b := (*mBucket)(atomic.LoadPointer(&n.buckets[i])); b != nil {
+ return b
+ }
+
+ p := (*mNode)(atomic.LoadPointer(&n.pred))
+ if p != nil {
+ var node []*Node
+ if n.mask > p.mask {
+ // Grow.
+ pb := (*mBucket)(atomic.LoadPointer(&p.buckets[i&p.mask]))
+ if pb == nil {
+ pb = p.initBucket(i & p.mask)
+ }
+ m := pb.freeze()
+ // Split nodes.
+ for _, x := range m {
+ if x.hash&n.mask == i {
+ node = append(node, x)
+ }
+ }
+ } else {
+ // Shrink.
+ pb0 := (*mBucket)(atomic.LoadPointer(&p.buckets[i]))
+ if pb0 == nil {
+ pb0 = p.initBucket(i)
+ }
+ pb1 := (*mBucket)(atomic.LoadPointer(&p.buckets[i+uint32(len(n.buckets))]))
+ if pb1 == nil {
+ pb1 = p.initBucket(i + uint32(len(n.buckets)))
+ }
+ m0 := pb0.freeze()
+ m1 := pb1.freeze()
+ // Merge nodes.
+ node = make([]*Node, 0, len(m0)+len(m1))
+ node = append(node, m0...)
+ node = append(node, m1...)
+ }
+ b := &mBucket{node: node}
+ if atomic.CompareAndSwapPointer(&n.buckets[i], nil, unsafe.Pointer(b)) {
+ if len(node) > mOverflowThreshold {
+ atomic.AddInt32(&n.overflow, int32(len(node)-mOverflowThreshold))
+ }
+ return b
+ }
+ }
+
+ return (*mBucket)(atomic.LoadPointer(&n.buckets[i]))
+}
+
+func (n *mNode) initBuckets() {
+ for i := range n.buckets {
+ n.initBucket(uint32(i))
+ }
+ atomic.StorePointer(&n.pred, nil)
+}
+
+// Cache is a 'cache map'.
+type Cache struct {
+ mu sync.RWMutex
+ mHead unsafe.Pointer // *mNode
+ nodes int32
+ size int32
+ cacher Cacher
+ closed bool
+}
+
+// NewCache creates a new 'cache map'. The cacher is optional and
+// may be nil.
+func NewCache(cacher Cacher) *Cache {
+ h := &mNode{
+ buckets: make([]unsafe.Pointer, mInitialSize),
+ mask: mInitialSize - 1,
+ growThreshold: int32(mInitialSize * mOverflowThreshold),
+ shrinkThreshold: 0,
+ }
+ for i := range h.buckets {
+ h.buckets[i] = unsafe.Pointer(&mBucket{})
+ }
+ r := &Cache{
+ mHead: unsafe.Pointer(h),
+ cacher: cacher,
+ }
+ return r
+}
+
+func (r *Cache) getBucket(hash uint32) (*mNode, *mBucket) {
+ h := (*mNode)(atomic.LoadPointer(&r.mHead))
+ i := hash & h.mask
+ b := (*mBucket)(atomic.LoadPointer(&h.buckets[i]))
+ if b == nil {
+ b = h.initBucket(i)
+ }
+ return h, b
+}
+
+func (r *Cache) delete(n *Node) bool {
+ for {
+ h, b := r.getBucket(n.hash)
+ done, deleted := b.delete(r, h, n.hash, n.ns, n.key)
+ if done {
+ return deleted
+ }
+ }
+ return false
+}
+
+// Nodes returns number of 'cache node' in the map.
+func (r *Cache) Nodes() int {
+ return int(atomic.LoadInt32(&r.nodes))
+}
+
+// Size returns sums of 'cache node' size in the map.
+func (r *Cache) Size() int {
+ return int(atomic.LoadInt32(&r.size))
+}
+
+// Capacity returns cache capacity.
+func (r *Cache) Capacity() int {
+ if r.cacher == nil {
+ return 0
+ }
+ return r.cacher.Capacity()
+}
+
+// SetCapacity sets cache capacity.
+func (r *Cache) SetCapacity(capacity int) {
+ if r.cacher != nil {
+ r.cacher.SetCapacity(capacity)
+ }
+}
+
+// Get gets 'cache node' with the given namespace and key.
+// If cache node is not found and setFunc is not nil, Get will atomically creates
+// the 'cache node' by calling setFunc. Otherwise Get will returns nil.
+//
+// The returned 'cache handle' should be released after use by calling Release
+// method.
+func (r *Cache) Get(ns, key uint64, setFunc func() (size int, value Value)) *Handle {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return nil
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, setFunc == nil)
+ if done {
+ if n != nil {
+ n.mu.Lock()
+ if n.value == nil {
+ if setFunc == nil {
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+
+ n.size, n.value = setFunc()
+ if n.value == nil {
+ n.size = 0
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+ atomic.AddInt32(&r.size, int32(n.size))
+ }
+ n.mu.Unlock()
+ if r.cacher != nil {
+ r.cacher.Promote(n)
+ }
+ return &Handle{unsafe.Pointer(n)}
+ }
+
+ break
+ }
+ }
+ return nil
+}
+
+// Delete removes and ban 'cache node' with the given namespace and key.
+// A banned 'cache node' will never inserted into the 'cache tree'. Ban
+// only attributed to the particular 'cache node', so when a 'cache node'
+// is recreated it will not be banned.
+//
+// If onDel is not nil, then it will be executed if such 'cache node'
+// doesn't exist or once the 'cache node' is released.
+//
+// Delete return true is such 'cache node' exist.
+func (r *Cache) Delete(ns, key uint64, onDel func()) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if onDel != nil {
+ n.mu.Lock()
+ n.onDel = append(n.onDel, onDel)
+ n.mu.Unlock()
+ }
+ if r.cacher != nil {
+ r.cacher.Ban(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ if onDel != nil {
+ onDel()
+ }
+
+ return false
+}
+
+// Evict evicts 'cache node' with the given namespace and key. This will
+// simply call Cacher.Evict.
+//
+// Evict return true is such 'cache node' exist.
+func (r *Cache) Evict(ns, key uint64) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if r.cacher != nil {
+ r.cacher.Evict(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ return false
+}
+
+// EvictNS evicts 'cache node' with the given namespace. This will
+// simply call Cacher.EvictNS.
+func (r *Cache) EvictNS(ns uint64) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return
+ }
+
+ if r.cacher != nil {
+ r.cacher.EvictNS(ns)
+ }
+}
+
+// EvictAll evicts all 'cache node'. This will simply call Cacher.EvictAll.
+func (r *Cache) EvictAll() {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return
+ }
+
+ if r.cacher != nil {
+ r.cacher.EvictAll()
+ }
+}
+
+// Close closes the 'cache map' and forcefully releases all 'cache node'.
+func (r *Cache) Close() error {
+ r.mu.Lock()
+ if !r.closed {
+ r.closed = true
+
+ h := (*mNode)(r.mHead)
+ h.initBuckets()
+
+ for i := range h.buckets {
+ b := (*mBucket)(h.buckets[i])
+ for _, n := range b.node {
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+ n.onDel = nil
+ }
+ }
+ }
+ r.mu.Unlock()
+
+ // Avoid deadlock.
+ if r.cacher != nil {
+ if err := r.cacher.Close(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// CloseWeak closes the 'cache map' and evict all 'cache node' from cacher, but
+// unlike Close it doesn't forcefully releases 'cache node'.
+func (r *Cache) CloseWeak() error {
+ r.mu.Lock()
+ if !r.closed {
+ r.closed = true
+ }
+ r.mu.Unlock()
+
+ // Avoid deadlock.
+ if r.cacher != nil {
+ r.cacher.EvictAll()
+ if err := r.cacher.Close(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Node is a 'cache node'.
+type Node struct {
+ r *Cache
+
+ hash uint32
+ ns, key uint64
+
+ mu sync.Mutex
+ size int
+ value Value
+
+ ref int32
+ onDel []func()
+
+ CacheData unsafe.Pointer
+}
+
+// NS returns this 'cache node' namespace.
+func (n *Node) NS() uint64 {
+ return n.ns
+}
+
+// Key returns this 'cache node' key.
+func (n *Node) Key() uint64 {
+ return n.key
+}
+
+// Size returns this 'cache node' size.
+func (n *Node) Size() int {
+ return n.size
+}
+
+// Value returns this 'cache node' value.
+func (n *Node) Value() Value {
+ return n.value
+}
+
+// Ref returns this 'cache node' ref counter.
+func (n *Node) Ref() int32 {
+ return atomic.LoadInt32(&n.ref)
+}
+
+// GetHandle returns an handle for this 'cache node'.
+func (n *Node) GetHandle() *Handle {
+ if atomic.AddInt32(&n.ref, 1) <= 1 {
+ panic("BUG: Node.GetHandle on zero ref")
+ }
+ return &Handle{unsafe.Pointer(n)}
+}
+
+func (n *Node) unref() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.delete(n)
+ }
+}
+
+func (n *Node) unrefLocked() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.mu.RLock()
+ if !n.r.closed {
+ n.r.delete(n)
+ }
+ n.r.mu.RUnlock()
+ }
+}
+
+// Handle is a 'cache handle' of a 'cache node'.
+type Handle struct {
+ n unsafe.Pointer // *Node
+}
+
+// Value returns the value of the 'cache node'.
+func (h *Handle) Value() Value {
+ n := (*Node)(atomic.LoadPointer(&h.n))
+ if n != nil {
+ return n.value
+ }
+ return nil
+}
+
+// Release releases this 'cache handle'.
+// It is safe to call release multiple times.
+func (h *Handle) Release() {
+ nPtr := atomic.LoadPointer(&h.n)
+ if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) {
+ n := (*Node)(nPtr)
+ n.unrefLocked()
+ }
+}
+
+func murmur32(ns, key uint64, seed uint32) uint32 {
+ const (
+ m = uint32(0x5bd1e995)
+ r = 24
+ )
+
+ k1 := uint32(ns >> 32)
+ k2 := uint32(ns)
+ k3 := uint32(key >> 32)
+ k4 := uint32(key)
+
+ k1 *= m
+ k1 ^= k1 >> r
+ k1 *= m
+
+ k2 *= m
+ k2 ^= k2 >> r
+ k2 *= m
+
+ k3 *= m
+ k3 ^= k3 >> r
+ k3 *= m
+
+ k4 *= m
+ k4 ^= k4 >> r
+ k4 *= m
+
+ h := seed
+
+ h *= m
+ h ^= k1
+ h *= m
+ h ^= k2
+ h *= m
+ h ^= k3
+ h *= m
+ h ^= k4
+
+ h ^= h >> 13
+ h *= m
+ h ^= h >> 15
+
+ return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
new file mode 100644
index 000000000..d9a84cde1
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
@@ -0,0 +1,195 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package cache
+
+import (
+ "sync"
+ "unsafe"
+)
+
+type lruNode struct {
+ n *Node
+ h *Handle
+ ban bool
+
+ next, prev *lruNode
+}
+
+func (n *lruNode) insert(at *lruNode) {
+ x := at.next
+ at.next = n
+ n.prev = at
+ n.next = x
+ x.prev = n
+}
+
+func (n *lruNode) remove() {
+ if n.prev != nil {
+ n.prev.next = n.next
+ n.next.prev = n.prev
+ n.prev = nil
+ n.next = nil
+ } else {
+ panic("BUG: removing removed node")
+ }
+}
+
+type lru struct {
+ mu sync.Mutex
+ capacity int
+ used int
+ recent lruNode
+}
+
+func (r *lru) reset() {
+ r.recent.next = &r.recent
+ r.recent.prev = &r.recent
+ r.used = 0
+}
+
+func (r *lru) Capacity() int {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ return r.capacity
+}
+
+func (r *lru) SetCapacity(capacity int) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ r.capacity = capacity
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Promote(n *Node) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ if n.CacheData == nil {
+ if n.Size() <= r.capacity {
+ rn := &lruNode{n: n, h: n.GetHandle()}
+ rn.insert(&r.recent)
+ n.CacheData = unsafe.Pointer(rn)
+ r.used += n.Size()
+
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.insert(&r.recent)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Ban(n *Node) {
+ r.mu.Lock()
+ if n.CacheData == nil {
+ n.CacheData = unsafe.Pointer(&lruNode{n: n, ban: true})
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.ban = true
+ r.used -= rn.n.Size()
+ r.mu.Unlock()
+
+ rn.h.Release()
+ rn.h = nil
+ return
+ }
+ }
+ r.mu.Unlock()
+}
+
+func (r *lru) Evict(n *Node) {
+ r.mu.Lock()
+ rn := (*lruNode)(n.CacheData)
+ if rn == nil || rn.ban {
+ r.mu.Unlock()
+ return
+ }
+ n.CacheData = nil
+ r.mu.Unlock()
+
+ rn.h.Release()
+}
+
+func (r *lru) EvictNS(ns uint64) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ for e := r.recent.prev; e != &r.recent; {
+ rn := e
+ e = e.prev
+ if rn.n.NS() == ns {
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) EvictAll() {
+ r.mu.Lock()
+ back := r.recent.prev
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.n.CacheData = nil
+ }
+ r.reset()
+ r.mu.Unlock()
+
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Close() error {
+ return nil
+}
+
+// NewLRU create a new LRU-cache.
+func NewLRU(capacity int) Cacher {
+ r := &lru{capacity: capacity}
+ r.reset()
+ return r
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
new file mode 100644
index 000000000..448402b82
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
@@ -0,0 +1,67 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+)
+
+type iComparer struct {
+ ucmp comparer.Comparer
+}
+
+func (icmp *iComparer) uName() string {
+ return icmp.ucmp.Name()
+}
+
+func (icmp *iComparer) uCompare(a, b []byte) int {
+ return icmp.ucmp.Compare(a, b)
+}
+
+func (icmp *iComparer) uSeparator(dst, a, b []byte) []byte {
+ return icmp.ucmp.Separator(dst, a, b)
+}
+
+func (icmp *iComparer) uSuccessor(dst, b []byte) []byte {
+ return icmp.ucmp.Successor(dst, b)
+}
+
+func (icmp *iComparer) Name() string {
+ return icmp.uName()
+}
+
+func (icmp *iComparer) Compare(a, b []byte) int {
+ x := icmp.uCompare(internalKey(a).ukey(), internalKey(b).ukey())
+ if x == 0 {
+ if m, n := internalKey(a).num(), internalKey(b).num(); m > n {
+ return -1
+ } else if m < n {
+ return 1
+ }
+ }
+ return x
+}
+
+func (icmp *iComparer) Separator(dst, a, b []byte) []byte {
+ ua, ub := internalKey(a).ukey(), internalKey(b).ukey()
+ dst = icmp.uSeparator(dst, ua, ub)
+ if dst != nil && len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 {
+ // Append earliest possible number.
+ return append(dst, keyMaxNumBytes...)
+ }
+ return nil
+}
+
+func (icmp *iComparer) Successor(dst, b []byte) []byte {
+ ub := internalKey(b).ukey()
+ dst = icmp.uSuccessor(dst, ub)
+ if dst != nil && len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 {
+ // Append earliest possible number.
+ return append(dst, keyMaxNumBytes...)
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
new file mode 100644
index 000000000..14dddf88d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
@@ -0,0 +1,51 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package comparer
+
+import "bytes"
+
+type bytesComparer struct{}
+
+func (bytesComparer) Compare(a, b []byte) int {
+ return bytes.Compare(a, b)
+}
+
+func (bytesComparer) Name() string {
+ return "leveldb.BytewiseComparator"
+}
+
+func (bytesComparer) Separator(dst, a, b []byte) []byte {
+ i, n := 0, len(a)
+ if n > len(b) {
+ n = len(b)
+ }
+ for ; i < n && a[i] == b[i]; i++ {
+ }
+ if i >= n {
+ // Do not shorten if one string is a prefix of the other
+ } else if c := a[i]; c < 0xff && c+1 < b[i] {
+ dst = append(dst, a[:i+1]...)
+ dst[i]++
+ return dst
+ }
+ return nil
+}
+
+func (bytesComparer) Successor(dst, b []byte) []byte {
+ for i, c := range b {
+ if c != 0xff {
+ dst = append(dst, b[:i+1]...)
+ dst[i]++
+ return dst
+ }
+ }
+ return nil
+}
+
+// DefaultComparer are default implementation of the Comparer interface.
+// It uses the natural ordering, consistent with bytes.Compare.
+var DefaultComparer = bytesComparer{}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
new file mode 100644
index 000000000..14a28f16f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
@@ -0,0 +1,57 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package comparer provides interface and implementation for ordering
+// sets of data.
+package comparer
+
+// BasicComparer is the interface that wraps the basic Compare method.
+type BasicComparer interface {
+ // Compare returns -1, 0, or +1 depending on whether a is 'less than',
+ // 'equal to' or 'greater than' b. The two arguments can only be 'equal'
+ // if their contents are exactly equal. Furthermore, the empty slice
+ // must be 'less than' any non-empty slice.
+ Compare(a, b []byte) int
+}
+
+// Comparer defines a total ordering over the space of []byte keys: a 'less
+// than' relationship.
+type Comparer interface {
+ BasicComparer
+
+ // Name returns name of the comparer.
+ //
+ // The Level-DB on-disk format stores the comparer name, and opening a
+ // database with a different comparer from the one it was created with
+ // will result in an error.
+ //
+ // An implementation to a new name whenever the comparer implementation
+ // changes in a way that will cause the relative ordering of any two keys
+ // to change.
+ //
+ // Names starting with "leveldb." are reserved and should not be used
+ // by any users of this package.
+ Name() string
+
+ // Bellow are advanced functions used used to reduce the space requirements
+ // for internal data structures such as index blocks.
+
+ // Separator appends a sequence of bytes x to dst such that a <= x && x < b,
+ // where 'less than' is consistent with Compare. An implementation should
+ // return nil if x equal to a.
+ //
+ // Either contents of a or b should not by any means modified. Doing so
+ // may cause corruption on the internal state.
+ Separator(dst, a, b []byte) []byte
+
+ // Successor appends a sequence of bytes x to dst such that x >= b, where
+ // 'less than' is consistent with Compare. An implementation should return
+ // nil if x equal to b.
+ //
+ // Contents of b should not by any means modified. Doing so may cause
+ // corruption on the internal state.
+ Successor(dst, b []byte) []byte
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db.go b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
new file mode 100644
index 000000000..a02cb2c50
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
@@ -0,0 +1,1091 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "container/list"
+ "fmt"
+ "io"
+ "os"
+ "runtime"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/table"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// DB is a LevelDB database.
+type DB struct {
+ // Need 64-bit alignment.
+ seq uint64
+
+ // Session.
+ s *session
+
+ // MemDB.
+ memMu sync.RWMutex
+ memPool chan *memdb.DB
+ mem, frozenMem *memDB
+ journal *journal.Writer
+ journalWriter storage.Writer
+ journalFd storage.FileDesc
+ frozenJournalFd storage.FileDesc
+ frozenSeq uint64
+
+ // Snapshot.
+ snapsMu sync.Mutex
+ snapsList *list.List
+
+ // Stats.
+ aliveSnaps, aliveIters int32
+
+ // Write.
+ batchPool sync.Pool
+ writeMergeC chan writeMerge
+ writeMergedC chan bool
+ writeLockC chan struct{}
+ writeAckC chan error
+ writeDelay time.Duration
+ writeDelayN int
+ tr *Transaction
+
+ // Compaction.
+ compCommitLk sync.Mutex
+ tcompCmdC chan cCmd
+ tcompPauseC chan chan<- struct{}
+ mcompCmdC chan cCmd
+ compErrC chan error
+ compPerErrC chan error
+ compErrSetC chan error
+ compWriteLocking bool
+ compStats cStats
+ memdbMaxLevel int // For testing.
+
+ // Close.
+ closeW sync.WaitGroup
+ closeC chan struct{}
+ closed uint32
+ closer io.Closer
+}
+
+func openDB(s *session) (*DB, error) {
+ s.log("db@open opening")
+ start := time.Now()
+ db := &DB{
+ s: s,
+ // Initial sequence
+ seq: s.stSeqNum,
+ // MemDB
+ memPool: make(chan *memdb.DB, 1),
+ // Snapshot
+ snapsList: list.New(),
+ // Write
+ batchPool: sync.Pool{New: newBatch},
+ writeMergeC: make(chan writeMerge),
+ writeMergedC: make(chan bool),
+ writeLockC: make(chan struct{}, 1),
+ writeAckC: make(chan error),
+ // Compaction
+ tcompCmdC: make(chan cCmd),
+ tcompPauseC: make(chan chan<- struct{}),
+ mcompCmdC: make(chan cCmd),
+ compErrC: make(chan error),
+ compPerErrC: make(chan error),
+ compErrSetC: make(chan error),
+ // Close
+ closeC: make(chan struct{}),
+ }
+
+ // Read-only mode.
+ readOnly := s.o.GetReadOnly()
+
+ if readOnly {
+ // Recover journals (read-only mode).
+ if err := db.recoverJournalRO(); err != nil {
+ return nil, err
+ }
+ } else {
+ // Recover journals.
+ if err := db.recoverJournal(); err != nil {
+ return nil, err
+ }
+
+ // Remove any obsolete files.
+ if err := db.checkAndCleanFiles(); err != nil {
+ // Close journal.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ }
+ return nil, err
+ }
+
+ }
+
+ // Doesn't need to be included in the wait group.
+ go db.compactionError()
+ go db.mpoolDrain()
+
+ if readOnly {
+ db.SetReadOnly()
+ } else {
+ db.closeW.Add(2)
+ go db.tCompaction()
+ go db.mCompaction()
+ // go db.jWriter()
+ }
+
+ s.logf("db@open done T·%v", time.Since(start))
+
+ runtime.SetFinalizer(db, (*DB).Close)
+ return db, nil
+}
+
+// Open opens or creates a DB for the given storage.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist Open will returns
+// os.ErrExist error.
+//
+// Open will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
+ if err != nil {
+ return
+ }
+ defer func() {
+ if err != nil {
+ s.close()
+ s.release()
+ }
+ }()
+
+ err = s.recover()
+ if err != nil {
+ if !os.IsNotExist(err) || s.o.GetErrorIfMissing() {
+ return
+ }
+ err = s.create()
+ if err != nil {
+ return
+ }
+ } else if s.o.GetErrorIfExist() {
+ err = os.ErrExist
+ return
+ }
+
+ return openDB(s)
+}
+
+// OpenFile opens or creates a DB for the given path.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist OpenFile will returns
+// os.ErrExist error.
+//
+// OpenFile uses standard file-system backed storage implementation as
+// described in the leveldb/storage package.
+//
+// OpenFile will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func OpenFile(path string, o *opt.Options) (db *DB, err error) {
+ stor, err := storage.OpenFile(path, o.GetReadOnly())
+ if err != nil {
+ return
+ }
+ db, err = Open(stor, o)
+ if err != nil {
+ stor.Close()
+ } else {
+ db.closer = stor
+ }
+ return
+}
+
+// Recover recovers and opens a DB with missing or corrupted manifest files
+// for the given storage. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
+ if err != nil {
+ return
+ }
+ defer func() {
+ if err != nil {
+ s.close()
+ s.release()
+ }
+ }()
+
+ err = recoverTable(s, o)
+ if err != nil {
+ return
+ }
+ return openDB(s)
+}
+
+// RecoverFile recovers and opens a DB with missing or corrupted manifest files
+// for the given path. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// RecoverFile uses standard file-system backed storage implementation as described
+// in the leveldb/storage package.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
+ stor, err := storage.OpenFile(path, false)
+ if err != nil {
+ return
+ }
+ db, err = Recover(stor, o)
+ if err != nil {
+ stor.Close()
+ } else {
+ db.closer = stor
+ }
+ return
+}
+
+func recoverTable(s *session, o *opt.Options) error {
+ o = dupOptions(o)
+ // Mask StrictReader, lets StrictRecovery doing its job.
+ o.Strict &= ^opt.StrictReader
+
+ // Get all tables and sort it by file number.
+ fds, err := s.stor.List(storage.TypeTable)
+ if err != nil {
+ return err
+ }
+ sortFds(fds)
+
+ var (
+ maxSeq uint64
+ recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
+
+ // We will drop corrupted table.
+ strict = o.GetStrict(opt.StrictRecovery)
+ noSync = o.GetNoSync()
+
+ rec = &sessionRecord{}
+ bpool = util.NewBufferPool(o.GetBlockSize() + 5)
+ )
+ buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
+ tmpFd = s.newTemp()
+ writer, err := s.stor.Create(tmpFd)
+ if err != nil {
+ return
+ }
+ defer func() {
+ writer.Close()
+ if err != nil {
+ s.stor.Remove(tmpFd)
+ tmpFd = storage.FileDesc{}
+ }
+ }()
+
+ // Copy entries.
+ tw := table.NewWriter(writer, o)
+ for iter.Next() {
+ key := iter.Key()
+ if validInternalKey(key) {
+ err = tw.Append(key, iter.Value())
+ if err != nil {
+ return
+ }
+ }
+ }
+ err = iter.Error()
+ if err != nil {
+ return
+ }
+ err = tw.Close()
+ if err != nil {
+ return
+ }
+ if !noSync {
+ err = writer.Sync()
+ if err != nil {
+ return
+ }
+ }
+ size = int64(tw.BytesLen())
+ return
+ }
+ recoverTable := func(fd storage.FileDesc) error {
+ s.logf("table@recovery recovering @%d", fd.Num)
+ reader, err := s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+ var closed bool
+ defer func() {
+ if !closed {
+ reader.Close()
+ }
+ }()
+
+ // Get file size.
+ size, err := reader.Seek(0, 2)
+ if err != nil {
+ return err
+ }
+
+ var (
+ tSeq uint64
+ tgoodKey, tcorruptedKey, tcorruptedBlock int
+ imin, imax []byte
+ )
+ tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
+ if err != nil {
+ return err
+ }
+ iter := tr.NewIterator(nil, nil)
+ if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
+ itererr.SetErrorCallback(func(err error) {
+ if errors.IsCorrupted(err) {
+ s.logf("table@recovery block corruption @%d %q", fd.Num, err)
+ tcorruptedBlock++
+ }
+ })
+ }
+
+ // Scan the table.
+ for iter.Next() {
+ key := iter.Key()
+ _, seq, _, kerr := parseInternalKey(key)
+ if kerr != nil {
+ tcorruptedKey++
+ continue
+ }
+ tgoodKey++
+ if seq > tSeq {
+ tSeq = seq
+ }
+ if imin == nil {
+ imin = append([]byte{}, key...)
+ }
+ imax = append(imax[:0], key...)
+ }
+ if err := iter.Error(); err != nil {
+ iter.Release()
+ return err
+ }
+ iter.Release()
+
+ goodKey += tgoodKey
+ corruptedKey += tcorruptedKey
+ corruptedBlock += tcorruptedBlock
+
+ if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
+ droppedTable++
+ s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+ return nil
+ }
+
+ if tgoodKey > 0 {
+ if tcorruptedKey > 0 || tcorruptedBlock > 0 {
+ // Rebuild the table.
+ s.logf("table@recovery rebuilding @%d", fd.Num)
+ iter := tr.NewIterator(nil, nil)
+ tmpFd, newSize, err := buildTable(iter)
+ iter.Release()
+ if err != nil {
+ return err
+ }
+ closed = true
+ reader.Close()
+ if err := s.stor.Rename(tmpFd, fd); err != nil {
+ return err
+ }
+ size = newSize
+ }
+ if tSeq > maxSeq {
+ maxSeq = tSeq
+ }
+ recoveredKey += tgoodKey
+ // Add table to level 0.
+ rec.addTable(0, fd.Num, size, imin, imax)
+ s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+ } else {
+ droppedTable++
+ s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
+ }
+
+ return nil
+ }
+
+ // Recover all tables.
+ if len(fds) > 0 {
+ s.logf("table@recovery F·%d", len(fds))
+
+ // Mark file number as used.
+ s.markFileNum(fds[len(fds)-1].Num)
+
+ for _, fd := range fds {
+ if err := recoverTable(fd); err != nil {
+ return err
+ }
+ }
+
+ s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
+ }
+
+ // Set sequence number.
+ rec.setSeqNum(maxSeq)
+
+ // Create new manifest.
+ if err := s.create(); err != nil {
+ return err
+ }
+
+ // Commit.
+ return s.commit(rec)
+}
+
+func (db *DB) recoverJournal() error {
+ // Get all journals and sort it by file number.
+ rawFds, err := db.s.stor.List(storage.TypeJournal)
+ if err != nil {
+ return err
+ }
+ sortFds(rawFds)
+
+ // Journals that will be recovered.
+ var fds []storage.FileDesc
+ for _, fd := range rawFds {
+ if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+ fds = append(fds, fd)
+ }
+ }
+
+ var (
+ ofd storage.FileDesc // Obsolete file.
+ rec = &sessionRecord{}
+ )
+
+ // Recover journals.
+ if len(fds) > 0 {
+ db.logf("journal@recovery F·%d", len(fds))
+
+ // Mark file number as used.
+ db.s.markFileNum(fds[len(fds)-1].Num)
+
+ var (
+ // Options.
+ strict = db.s.o.GetStrict(opt.StrictJournal)
+ checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
+ writeBuffer = db.s.o.GetWriteBuffer()
+
+ jr *journal.Reader
+ mdb = memdb.New(db.s.icmp, writeBuffer)
+ buf = &util.Buffer{}
+ batchSeq uint64
+ batchLen int
+ )
+
+ for _, fd := range fds {
+ db.logf("journal@recovery recovering @%d", fd.Num)
+
+ fr, err := db.s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+
+ // Create or reset journal reader instance.
+ if jr == nil {
+ jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+ } else {
+ jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+ }
+
+ // Flush memdb and remove obsolete journal file.
+ if !ofd.Zero() {
+ if mdb.Len() > 0 {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ fr.Close()
+ return err
+ }
+ }
+
+ rec.setJournalNum(fd.Num)
+ rec.setSeqNum(db.seq)
+ if err := db.s.commit(rec); err != nil {
+ fr.Close()
+ return err
+ }
+ rec.resetAddedTables()
+
+ db.s.stor.Remove(ofd)
+ ofd = storage.FileDesc{}
+ }
+
+ // Replay journal to memdb.
+ mdb.Reset()
+ for {
+ r, err := jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ buf.Reset()
+ if _, err := buf.ReadFrom(r); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ // This is error returned due to corruption, with strict == false.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+ batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+ if err != nil {
+ if !strict && errors.IsCorrupted(err) {
+ db.s.logf("journal error: %v (skipped)", err)
+ // We won't apply sequence number as it might be corrupted.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ // Save sequence number.
+ db.seq = batchSeq + uint64(batchLen)
+
+ // Flush it if large enough.
+ if mdb.Size() >= writeBuffer {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ fr.Close()
+ return err
+ }
+
+ mdb.Reset()
+ }
+ }
+
+ fr.Close()
+ ofd = fd
+ }
+
+ // Flush the last memdb.
+ if mdb.Len() > 0 {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ return err
+ }
+ }
+ }
+
+ // Create a new journal.
+ if _, err := db.newMem(0); err != nil {
+ return err
+ }
+
+ // Commit.
+ rec.setJournalNum(db.journalFd.Num)
+ rec.setSeqNum(db.seq)
+ if err := db.s.commit(rec); err != nil {
+ // Close journal on error.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ }
+ return err
+ }
+
+ // Remove the last obsolete journal file.
+ if !ofd.Zero() {
+ db.s.stor.Remove(ofd)
+ }
+
+ return nil
+}
+
+func (db *DB) recoverJournalRO() error {
+ // Get all journals and sort it by file number.
+ rawFds, err := db.s.stor.List(storage.TypeJournal)
+ if err != nil {
+ return err
+ }
+ sortFds(rawFds)
+
+ // Journals that will be recovered.
+ var fds []storage.FileDesc
+ for _, fd := range rawFds {
+ if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+ fds = append(fds, fd)
+ }
+ }
+
+ var (
+ // Options.
+ strict = db.s.o.GetStrict(opt.StrictJournal)
+ checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
+ writeBuffer = db.s.o.GetWriteBuffer()
+
+ mdb = memdb.New(db.s.icmp, writeBuffer)
+ )
+
+ // Recover journals.
+ if len(fds) > 0 {
+ db.logf("journal@recovery RO·Mode F·%d", len(fds))
+
+ var (
+ jr *journal.Reader
+ buf = &util.Buffer{}
+ batchSeq uint64
+ batchLen int
+ )
+
+ for _, fd := range fds {
+ db.logf("journal@recovery recovering @%d", fd.Num)
+
+ fr, err := db.s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+
+ // Create or reset journal reader instance.
+ if jr == nil {
+ jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+ } else {
+ jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+ }
+
+ // Replay journal to memdb.
+ for {
+ r, err := jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ buf.Reset()
+ if _, err := buf.ReadFrom(r); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ // This is error returned due to corruption, with strict == false.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+ batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+ if err != nil {
+ if !strict && errors.IsCorrupted(err) {
+ db.s.logf("journal error: %v (skipped)", err)
+ // We won't apply sequence number as it might be corrupted.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ // Save sequence number.
+ db.seq = batchSeq + uint64(batchLen)
+ }
+
+ fr.Close()
+ }
+ }
+
+ // Set memDB.
+ db.mem = &memDB{db: db, DB: mdb, ref: 1}
+
+ return nil
+}
+
+func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
+ mk, mv, err := mdb.Find(ikey)
+ if err == nil {
+ ukey, _, kt, kerr := parseInternalKey(mk)
+ if kerr != nil {
+ // Shouldn't have had happen.
+ panic(kerr)
+ }
+ if icmp.uCompare(ukey, ikey.ukey()) == 0 {
+ if kt == keyTypeDel {
+ return true, nil, ErrNotFound
+ }
+ return true, mv, nil
+
+ }
+ } else if err != ErrNotFound {
+ return true, nil, err
+ }
+ return
+}
+
+func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
+ ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+ if auxm != nil {
+ if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
+ return append([]byte{}, mv...), me
+ }
+ }
+
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
+ if m == nil {
+ continue
+ }
+ defer m.decref()
+
+ if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
+ return append([]byte{}, mv...), me
+ }
+ }
+
+ v := db.s.version()
+ value, cSched, err := v.get(auxt, ikey, ro, false)
+ v.release()
+ if cSched {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ return
+}
+
+func nilIfNotFound(err error) error {
+ if err == ErrNotFound {
+ return nil
+ }
+ return err
+}
+
+func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
+ ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+ if auxm != nil {
+ if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
+ return me == nil, nilIfNotFound(me)
+ }
+ }
+
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
+ if m == nil {
+ continue
+ }
+ defer m.decref()
+
+ if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
+ return me == nil, nilIfNotFound(me)
+ }
+ }
+
+ v := db.s.version()
+ _, cSched, err := v.get(auxt, ikey, ro, true)
+ v.release()
+ if cSched {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ if err == nil {
+ ret = true
+ } else if err == ErrNotFound {
+ err = nil
+ }
+ return
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.get(nil, nil, key, se.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.has(nil, nil, key, se.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the
+// underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := db.ok(); err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ // Iterator holds 'version' lock, 'version' is immutable so snapshot
+ // can be released after iterator created.
+ return db.newIterator(nil, nil, se.seq, slice, ro)
+}
+
+// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
+// is a frozen snapshot of a DB state at a particular point in time. The
+// content of snapshot are guaranteed to be consistent.
+//
+// The snapshot must be released after use, by calling Release method.
+func (db *DB) GetSnapshot() (*Snapshot, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ return db.newSnapshot(), nil
+}
+
+// GetProperty returns value of the given property name.
+//
+// Property names:
+// leveldb.num-files-at-level{n}
+// Returns the number of files at level 'n'.
+// leveldb.stats
+// Returns statistics of the underlying DB.
+// leveldb.sstables
+// Returns sstables list for each level.
+// leveldb.blockpool
+// Returns block pool stats.
+// leveldb.cachedblock
+// Returns size of cached block.
+// leveldb.openedtables
+// Returns number of opened tables.
+// leveldb.alivesnaps
+// Returns number of alive snapshots.
+// leveldb.aliveiters
+// Returns number of alive iterators.
+func (db *DB) GetProperty(name string) (value string, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ const prefix = "leveldb."
+ if !strings.HasPrefix(name, prefix) {
+ return "", ErrNotFound
+ }
+ p := name[len(prefix):]
+
+ v := db.s.version()
+ defer v.release()
+
+ numFilesPrefix := "num-files-at-level"
+ switch {
+ case strings.HasPrefix(p, numFilesPrefix):
+ var level uint
+ var rest string
+ n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
+ if n != 1 {
+ err = ErrNotFound
+ } else {
+ value = fmt.Sprint(v.tLen(int(level)))
+ }
+ case p == "stats":
+ value = "Compactions\n" +
+ " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" +
+ "-------+------------+---------------+---------------+---------------+---------------\n"
+ for level, tables := range v.levels {
+ duration, read, write := db.compStats.getStat(level)
+ if len(tables) == 0 && duration == 0 {
+ continue
+ }
+ value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
+ level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
+ float64(read)/1048576.0, float64(write)/1048576.0)
+ }
+ case p == "sstables":
+ for level, tables := range v.levels {
+ value += fmt.Sprintf("--- level %d ---\n", level)
+ for _, t := range tables {
+ value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
+ }
+ }
+ case p == "blockpool":
+ value = fmt.Sprintf("%v", db.s.tops.bpool)
+ case p == "cachedblock":
+ if db.s.tops.bcache != nil {
+ value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
+ } else {
+ value = "<nil>"
+ }
+ case p == "openedtables":
+ value = fmt.Sprintf("%d", db.s.tops.cache.Size())
+ case p == "alivesnaps":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
+ case p == "aliveiters":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
+ default:
+ err = ErrNotFound
+ }
+
+ return
+}
+
+// SizeOf calculates approximate sizes of the given key ranges.
+// The length of the returned sizes are equal with the length of the given
+// ranges. The returned sizes measure storage space usage, so if the user
+// data compresses by a factor of ten, the returned sizes will be one-tenth
+// the size of the corresponding user data size.
+// The results may not include the sizes of recently written data.
+func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ v := db.s.version()
+ defer v.release()
+
+ sizes := make(Sizes, 0, len(ranges))
+ for _, r := range ranges {
+ imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
+ imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
+ start, err := v.offsetOf(imin)
+ if err != nil {
+ return nil, err
+ }
+ limit, err := v.offsetOf(imax)
+ if err != nil {
+ return nil, err
+ }
+ var size int64
+ if limit >= start {
+ size = limit - start
+ }
+ sizes = append(sizes, size)
+ }
+
+ return sizes, nil
+}
+
+// Close closes the DB. This will also releases any outstanding snapshot,
+// abort any in-flight compaction and discard open transaction.
+//
+// It is not safe to close a DB until all outstanding iterators are released.
+// It is valid to call Close multiple times. Other methods should not be
+// called after the DB has been closed.
+func (db *DB) Close() error {
+ if !db.setClosed() {
+ return ErrClosed
+ }
+
+ start := time.Now()
+ db.log("db@close closing")
+
+ // Clear the finalizer.
+ runtime.SetFinalizer(db, nil)
+
+ // Get compaction error.
+ var err error
+ select {
+ case err = <-db.compErrC:
+ if err == ErrReadOnly {
+ err = nil
+ }
+ default:
+ }
+
+ // Signal all goroutines.
+ close(db.closeC)
+
+ // Discard open transaction.
+ if db.tr != nil {
+ db.tr.Discard()
+ }
+
+ // Acquire writer lock.
+ db.writeLockC <- struct{}{}
+
+ // Wait for all gorotines to exit.
+ db.closeW.Wait()
+
+ // Closes journal.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ db.journal = nil
+ db.journalWriter = nil
+ }
+
+ if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+ }
+
+ // Close session.
+ db.s.close()
+ db.logf("db@close done T·%v", time.Since(start))
+ db.s.release()
+
+ if db.closer != nil {
+ if err1 := db.closer.Close(); err == nil {
+ err = err1
+ }
+ db.closer = nil
+ }
+
+ // Clear memdbs.
+ db.clearMems()
+
+ return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
new file mode 100644
index 000000000..2d0ad0753
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
@@ -0,0 +1,826 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+var (
+ errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting")
+)
+
+type cStat struct {
+ duration time.Duration
+ read int64
+ write int64
+}
+
+func (p *cStat) add(n *cStatStaging) {
+ p.duration += n.duration
+ p.read += n.read
+ p.write += n.write
+}
+
+func (p *cStat) get() (duration time.Duration, read, write int64) {
+ return p.duration, p.read, p.write
+}
+
+type cStatStaging struct {
+ start time.Time
+ duration time.Duration
+ on bool
+ read int64
+ write int64
+}
+
+func (p *cStatStaging) startTimer() {
+ if !p.on {
+ p.start = time.Now()
+ p.on = true
+ }
+}
+
+func (p *cStatStaging) stopTimer() {
+ if p.on {
+ p.duration += time.Since(p.start)
+ p.on = false
+ }
+}
+
+type cStats struct {
+ lk sync.Mutex
+ stats []cStat
+}
+
+func (p *cStats) addStat(level int, n *cStatStaging) {
+ p.lk.Lock()
+ if level >= len(p.stats) {
+ newStats := make([]cStat, level+1)
+ copy(newStats, p.stats)
+ p.stats = newStats
+ }
+ p.stats[level].add(n)
+ p.lk.Unlock()
+}
+
+func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) {
+ p.lk.Lock()
+ defer p.lk.Unlock()
+ if level < len(p.stats) {
+ return p.stats[level].get()
+ }
+ return
+}
+
+func (db *DB) compactionError() {
+ var err error
+noerr:
+ // No error.
+ for {
+ select {
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
+ case err == ErrReadOnly, errors.IsCorrupted(err):
+ goto hasperr
+ default:
+ goto haserr
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+haserr:
+ // Transient error.
+ for {
+ select {
+ case db.compErrC <- err:
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
+ goto noerr
+ case err == ErrReadOnly, errors.IsCorrupted(err):
+ goto hasperr
+ default:
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+hasperr:
+ // Persistent error.
+ for {
+ select {
+ case db.compErrC <- err:
+ case db.compPerErrC <- err:
+ case db.writeLockC <- struct{}{}:
+ // Hold write lock, so that write won't pass-through.
+ db.compWriteLocking = true
+ case <-db.closeC:
+ if db.compWriteLocking {
+ // We should release the lock or Close will hang.
+ <-db.writeLockC
+ }
+ return
+ }
+ }
+}
+
+type compactionTransactCounter int
+
+func (cnt *compactionTransactCounter) incr() {
+ *cnt++
+}
+
+type compactionTransactInterface interface {
+ run(cnt *compactionTransactCounter) error
+ revert() error
+}
+
+func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
+ defer func() {
+ if x := recover(); x != nil {
+ if x == errCompactionTransactExiting {
+ if err := t.revert(); err != nil {
+ db.logf("%s revert error %q", name, err)
+ }
+ }
+ panic(x)
+ }
+ }()
+
+ const (
+ backoffMin = 1 * time.Second
+ backoffMax = 8 * time.Second
+ backoffMul = 2 * time.Second
+ )
+ var (
+ backoff = backoffMin
+ backoffT = time.NewTimer(backoff)
+ lastCnt = compactionTransactCounter(0)
+
+ disableBackoff = db.s.o.GetDisableCompactionBackoff()
+ )
+ for n := 0; ; n++ {
+ // Check whether the DB is closed.
+ if db.isClosed() {
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ } else if n > 0 {
+ db.logf("%s retrying N·%d", name, n)
+ }
+
+ // Execute.
+ cnt := compactionTransactCounter(0)
+ err := t.run(&cnt)
+ if err != nil {
+ db.logf("%s error I·%d %q", name, cnt, err)
+ }
+
+ // Set compaction error status.
+ select {
+ case db.compErrSetC <- err:
+ case perr := <-db.compPerErrC:
+ if err != nil {
+ db.logf("%s exiting (persistent error %q)", name, perr)
+ db.compactionExitTransact()
+ }
+ case <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ }
+ if err == nil {
+ return
+ }
+ if errors.IsCorrupted(err) {
+ db.logf("%s exiting (corruption detected)", name)
+ db.compactionExitTransact()
+ }
+
+ if !disableBackoff {
+ // Reset backoff duration if counter is advancing.
+ if cnt > lastCnt {
+ backoff = backoffMin
+ lastCnt = cnt
+ }
+
+ // Backoff.
+ backoffT.Reset(backoff)
+ if backoff < backoffMax {
+ backoff *= backoffMul
+ if backoff > backoffMax {
+ backoff = backoffMax
+ }
+ }
+ select {
+ case <-backoffT.C:
+ case <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ }
+ }
+ }
+}
+
+type compactionTransactFunc struct {
+ runFunc func(cnt *compactionTransactCounter) error
+ revertFunc func() error
+}
+
+func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error {
+ return t.runFunc(cnt)
+}
+
+func (t *compactionTransactFunc) revert() error {
+ if t.revertFunc != nil {
+ return t.revertFunc()
+ }
+ return nil
+}
+
+func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) {
+ db.compactionTransact(name, &compactionTransactFunc{run, revert})
+}
+
+func (db *DB) compactionExitTransact() {
+ panic(errCompactionTransactExiting)
+}
+
+func (db *DB) compactionCommit(name string, rec *sessionRecord) {
+ db.compCommitLk.Lock()
+ defer db.compCommitLk.Unlock() // Defer is necessary.
+ db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error {
+ return db.s.commit(rec)
+ }, nil)
+}
+
+func (db *DB) memCompaction() {
+ mdb := db.getFrozenMem()
+ if mdb == nil {
+ return
+ }
+ defer mdb.decref()
+
+ db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size()))
+
+ // Don't compact empty memdb.
+ if mdb.Len() == 0 {
+ db.logf("memdb@flush skipping")
+ // drop frozen memdb
+ db.dropFrozenMem()
+ return
+ }
+
+ // Pause table compaction.
+ resumeC := make(chan struct{})
+ select {
+ case db.tcompPauseC <- (chan<- struct{})(resumeC):
+ case <-db.compPerErrC:
+ close(resumeC)
+ resumeC = nil
+ case <-db.closeC:
+ return
+ }
+
+ var (
+ rec = &sessionRecord{}
+ stats = &cStatStaging{}
+ flushLevel int
+ )
+
+ // Generate tables.
+ db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
+ stats.startTimer()
+ flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel)
+ stats.stopTimer()
+ return
+ }, func() error {
+ for _, r := range rec.addedTables {
+ db.logf("memdb@flush revert @%d", r.num)
+ if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+
+ rec.setJournalNum(db.journalFd.Num)
+ rec.setSeqNum(db.frozenSeq)
+
+ // Commit.
+ stats.startTimer()
+ db.compactionCommit("memdb", rec)
+ stats.stopTimer()
+
+ db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
+
+ for _, r := range rec.addedTables {
+ stats.write += r.size
+ }
+ db.compStats.addStat(flushLevel, stats)
+
+ // Drop frozen memdb.
+ db.dropFrozenMem()
+
+ // Resume table compaction.
+ if resumeC != nil {
+ select {
+ case <-resumeC:
+ close(resumeC)
+ case <-db.closeC:
+ return
+ }
+ }
+
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+}
+
+type tableCompactionBuilder struct {
+ db *DB
+ s *session
+ c *compaction
+ rec *sessionRecord
+ stat0, stat1 *cStatStaging
+
+ snapHasLastUkey bool
+ snapLastUkey []byte
+ snapLastSeq uint64
+ snapIter int
+ snapKerrCnt int
+ snapDropCnt int
+
+ kerrCnt int
+ dropCnt int
+
+ minSeq uint64
+ strict bool
+ tableSize int
+
+ tw *tWriter
+}
+
+func (b *tableCompactionBuilder) appendKV(key, value []byte) error {
+ // Create new table if not already.
+ if b.tw == nil {
+ // Check for pause event.
+ if b.db != nil {
+ select {
+ case ch := <-b.db.tcompPauseC:
+ b.db.pauseCompaction(ch)
+ case <-b.db.closeC:
+ b.db.compactionExitTransact()
+ default:
+ }
+ }
+
+ // Create new table.
+ var err error
+ b.tw, err = b.s.tops.create()
+ if err != nil {
+ return err
+ }
+ }
+
+ // Write key/value into table.
+ return b.tw.append(key, value)
+}
+
+func (b *tableCompactionBuilder) needFlush() bool {
+ return b.tw.tw.BytesLen() >= b.tableSize
+}
+
+func (b *tableCompactionBuilder) flush() error {
+ t, err := b.tw.finish()
+ if err != nil {
+ return err
+ }
+ b.rec.addTableFile(b.c.sourceLevel+1, t)
+ b.stat1.write += t.size
+ b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
+ b.tw = nil
+ return nil
+}
+
+func (b *tableCompactionBuilder) cleanup() {
+ if b.tw != nil {
+ b.tw.drop()
+ b.tw = nil
+ }
+}
+
+func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
+ snapResumed := b.snapIter > 0
+ hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary.
+ lastUkey := append([]byte{}, b.snapLastUkey...)
+ lastSeq := b.snapLastSeq
+ b.kerrCnt = b.snapKerrCnt
+ b.dropCnt = b.snapDropCnt
+ // Restore compaction state.
+ b.c.restore()
+
+ defer b.cleanup()
+
+ b.stat1.startTimer()
+ defer b.stat1.stopTimer()
+
+ iter := b.c.newIterator()
+ defer iter.Release()
+ for i := 0; iter.Next(); i++ {
+ // Incr transact counter.
+ cnt.incr()
+
+ // Skip until last state.
+ if i < b.snapIter {
+ continue
+ }
+
+ resumed := false
+ if snapResumed {
+ resumed = true
+ snapResumed = false
+ }
+
+ ikey := iter.Key()
+ ukey, seq, kt, kerr := parseInternalKey(ikey)
+
+ if kerr == nil {
+ shouldStop := !resumed && b.c.shouldStopBefore(ikey)
+
+ if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 {
+ // First occurrence of this user key.
+
+ // Only rotate tables if ukey doesn't hop across.
+ if b.tw != nil && (shouldStop || b.needFlush()) {
+ if err := b.flush(); err != nil {
+ return err
+ }
+
+ // Creates snapshot of the state.
+ b.c.save()
+ b.snapHasLastUkey = hasLastUkey
+ b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...)
+ b.snapLastSeq = lastSeq
+ b.snapIter = i
+ b.snapKerrCnt = b.kerrCnt
+ b.snapDropCnt = b.dropCnt
+ }
+
+ hasLastUkey = true
+ lastUkey = append(lastUkey[:0], ukey...)
+ lastSeq = keyMaxSeq
+ }
+
+ switch {
+ case lastSeq <= b.minSeq:
+ // Dropped because newer entry for same user key exist
+ fallthrough // (A)
+ case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
+ // For this user key:
+ // (1) there is no data in higher levels
+ // (2) data in lower levels will have larger seq numbers
+ // (3) data in layers that are being compacted here and have
+ // smaller seq numbers will be dropped in the next
+ // few iterations of this loop (by rule (A) above).
+ // Therefore this deletion marker is obsolete and can be dropped.
+ lastSeq = seq
+ b.dropCnt++
+ continue
+ default:
+ lastSeq = seq
+ }
+ } else {
+ if b.strict {
+ return kerr
+ }
+
+ // Don't drop corrupted keys.
+ hasLastUkey = false
+ lastUkey = lastUkey[:0]
+ lastSeq = keyMaxSeq
+ b.kerrCnt++
+ }
+
+ if err := b.appendKV(ikey, iter.Value()); err != nil {
+ return err
+ }
+ }
+
+ if err := iter.Error(); err != nil {
+ return err
+ }
+
+ // Finish last table.
+ if b.tw != nil && !b.tw.empty() {
+ return b.flush()
+ }
+ return nil
+}
+
+func (b *tableCompactionBuilder) revert() error {
+ for _, at := range b.rec.addedTables {
+ b.s.logf("table@build revert @%d", at.num)
+ if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
+ defer c.release()
+
+ rec := &sessionRecord{}
+ rec.addCompPtr(c.sourceLevel, c.imax)
+
+ if !noTrivial && c.trivial() {
+ t := c.levels[0][0]
+ db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1)
+ rec.delTable(c.sourceLevel, t.fd.Num)
+ rec.addTableFile(c.sourceLevel+1, t)
+ db.compactionCommit("table-move", rec)
+ return
+ }
+
+ var stats [2]cStatStaging
+ for i, tables := range c.levels {
+ for _, t := range tables {
+ stats[i].read += t.size
+ // Insert deleted tables into record
+ rec.delTable(c.sourceLevel+i, t.fd.Num)
+ }
+ }
+ sourceSize := int(stats[0].read + stats[1].read)
+ minSeq := db.minSeq()
+ db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq)
+
+ b := &tableCompactionBuilder{
+ db: db,
+ s: db.s,
+ c: c,
+ rec: rec,
+ stat1: &stats[1],
+ minSeq: minSeq,
+ strict: db.s.o.GetStrict(opt.StrictCompaction),
+ tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1),
+ }
+ db.compactionTransact("table@build", b)
+
+ // Commit.
+ stats[1].startTimer()
+ db.compactionCommit("table", rec)
+ stats[1].stopTimer()
+
+ resultSize := int(stats[1].write)
+ db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
+
+ // Save compaction stats
+ for i := range stats {
+ db.compStats.addStat(c.sourceLevel+1, &stats[i])
+ }
+}
+
+func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error {
+ db.logf("table@compaction range L%d %q:%q", level, umin, umax)
+ if level >= 0 {
+ if c := db.s.getCompactionRange(level, umin, umax, true); c != nil {
+ db.tableCompaction(c, true)
+ }
+ } else {
+ // Retry until nothing to compact.
+ for {
+ compacted := false
+
+ // Scan for maximum level with overlapped tables.
+ v := db.s.version()
+ m := 1
+ for i := m; i < len(v.levels); i++ {
+ tables := v.levels[i]
+ if tables.overlaps(db.s.icmp, umin, umax, false) {
+ m = i
+ }
+ }
+ v.release()
+
+ for level := 0; level < m; level++ {
+ if c := db.s.getCompactionRange(level, umin, umax, false); c != nil {
+ db.tableCompaction(c, true)
+ compacted = true
+ }
+ }
+
+ if !compacted {
+ break
+ }
+ }
+ }
+
+ return nil
+}
+
+func (db *DB) tableAutoCompaction() {
+ if c := db.s.pickCompaction(); c != nil {
+ db.tableCompaction(c, false)
+ }
+}
+
+func (db *DB) tableNeedCompaction() bool {
+ v := db.s.version()
+ defer v.release()
+ return v.needCompaction()
+}
+
+func (db *DB) pauseCompaction(ch chan<- struct{}) {
+ select {
+ case ch <- struct{}{}:
+ case <-db.closeC:
+ db.compactionExitTransact()
+ }
+}
+
+type cCmd interface {
+ ack(err error)
+}
+
+type cAuto struct {
+ ackC chan<- error
+}
+
+func (r cAuto) ack(err error) {
+ if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
+ r.ackC <- err
+ }
+}
+
+type cRange struct {
+ level int
+ min, max []byte
+ ackC chan<- error
+}
+
+func (r cRange) ack(err error) {
+ if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
+ r.ackC <- err
+ }
+}
+
+// This will trigger auto compaction but will not wait for it.
+func (db *DB) compTrigger(compC chan<- cCmd) {
+ select {
+ case compC <- cAuto{}:
+ default:
+ }
+}
+
+// This will trigger auto compaction and/or wait for all compaction to be done.
+func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) {
+ ch := make(chan error)
+ defer close(ch)
+ // Send cmd.
+ select {
+ case compC <- cAuto{ch}:
+ case err = <-db.compErrC:
+ return
+ case <-db.closeC:
+ return ErrClosed
+ }
+ // Wait cmd.
+ select {
+ case err = <-ch:
+ case err = <-db.compErrC:
+ case <-db.closeC:
+ return ErrClosed
+ }
+ return err
+}
+
+// Send range compaction request.
+func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
+ ch := make(chan error)
+ defer close(ch)
+ // Send cmd.
+ select {
+ case compC <- cRange{level, min, max, ch}:
+ case err := <-db.compErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+ // Wait cmd.
+ select {
+ case err = <-ch:
+ case err = <-db.compErrC:
+ case <-db.closeC:
+ return ErrClosed
+ }
+ return err
+}
+
+func (db *DB) mCompaction() {
+ var x cCmd
+
+ defer func() {
+ if x := recover(); x != nil {
+ if x != errCompactionTransactExiting {
+ panic(x)
+ }
+ }
+ if x != nil {
+ x.ack(ErrClosed)
+ }
+ db.closeW.Done()
+ }()
+
+ for {
+ select {
+ case x = <-db.mcompCmdC:
+ switch x.(type) {
+ case cAuto:
+ db.memCompaction()
+ x.ack(nil)
+ x = nil
+ default:
+ panic("leveldb: unknown command")
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+}
+
+func (db *DB) tCompaction() {
+ var x cCmd
+ var ackQ []cCmd
+
+ defer func() {
+ if x := recover(); x != nil {
+ if x != errCompactionTransactExiting {
+ panic(x)
+ }
+ }
+ for i := range ackQ {
+ ackQ[i].ack(ErrClosed)
+ ackQ[i] = nil
+ }
+ if x != nil {
+ x.ack(ErrClosed)
+ }
+ db.closeW.Done()
+ }()
+
+ for {
+ if db.tableNeedCompaction() {
+ select {
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
+ continue
+ case <-db.closeC:
+ return
+ default:
+ }
+ } else {
+ for i := range ackQ {
+ ackQ[i].ack(nil)
+ ackQ[i] = nil
+ }
+ ackQ = ackQ[:0]
+ select {
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
+ continue
+ case <-db.closeC:
+ return
+ }
+ }
+ if x != nil {
+ switch cmd := x.(type) {
+ case cAuto:
+ ackQ = append(ackQ, x)
+ case cRange:
+ x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max))
+ default:
+ panic("leveldb: unknown command")
+ }
+ x = nil
+ }
+ db.tableAutoCompaction()
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
new file mode 100644
index 000000000..03c24cdab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
@@ -0,0 +1,360 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "errors"
+ "math/rand"
+ "runtime"
+ "sync"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+ errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key")
+)
+
+type memdbReleaser struct {
+ once sync.Once
+ m *memDB
+}
+
+func (mr *memdbReleaser) Release() {
+ mr.once.Do(func() {
+ mr.m.decref()
+ })
+}
+
+func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
+ em, fm := db.getMems()
+ v := db.s.version()
+
+ tableIts := v.getIterators(slice, ro)
+ n := len(tableIts) + len(auxt) + 3
+ its := make([]iterator.Iterator, 0, n)
+
+ if auxm != nil {
+ ami := auxm.NewIterator(slice)
+ ami.SetReleaser(&memdbReleaser{m: auxm})
+ its = append(its, ami)
+ }
+ for _, t := range auxt {
+ its = append(its, v.s.tops.newIterator(t, slice, ro))
+ }
+
+ emi := em.NewIterator(slice)
+ emi.SetReleaser(&memdbReleaser{m: em})
+ its = append(its, emi)
+ if fm != nil {
+ fmi := fm.NewIterator(slice)
+ fmi.SetReleaser(&memdbReleaser{m: fm})
+ its = append(its, fmi)
+ }
+ its = append(its, tableIts...)
+ mi := iterator.NewMergedIterator(its, db.s.icmp, strict)
+ mi.SetReleaser(&versionReleaser{v: v})
+ return mi
+}
+
+func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
+ var islice *util.Range
+ if slice != nil {
+ islice = &util.Range{}
+ if slice.Start != nil {
+ islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek)
+ }
+ if slice.Limit != nil {
+ islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek)
+ }
+ }
+ rawIter := db.newRawIterator(auxm, auxt, islice, ro)
+ iter := &dbIter{
+ db: db,
+ icmp: db.s.icmp,
+ iter: rawIter,
+ seq: seq,
+ strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
+ key: make([]byte, 0),
+ value: make([]byte, 0),
+ }
+ atomic.AddInt32(&db.aliveIters, 1)
+ runtime.SetFinalizer(iter, (*dbIter).Release)
+ return iter
+}
+
+func (db *DB) iterSamplingRate() int {
+ return rand.Intn(2 * db.s.o.GetIteratorSamplingRate())
+}
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+// dbIter represent an interator states over a database session.
+type dbIter struct {
+ db *DB
+ icmp *iComparer
+ iter iterator.Iterator
+ seq uint64
+ strict bool
+
+ smaplingGap int
+ dir dir
+ key []byte
+ value []byte
+ err error
+ releaser util.Releaser
+}
+
+func (i *dbIter) sampleSeek() {
+ ikey := i.iter.Key()
+ i.smaplingGap -= len(ikey) + len(i.iter.Value())
+ for i.smaplingGap < 0 {
+ i.smaplingGap += i.db.iterSamplingRate()
+ i.db.sampleSeek(ikey)
+ }
+}
+
+func (i *dbIter) setErr(err error) {
+ i.err = err
+ i.key = nil
+ i.value = nil
+}
+
+func (i *dbIter) iterErr() {
+ if err := i.iter.Error(); err != nil {
+ i.setErr(err)
+ }
+}
+
+func (i *dbIter) Valid() bool {
+ return i.err == nil && i.dir > dirEOI
+}
+
+func (i *dbIter) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.iter.First() {
+ i.dir = dirSOI
+ return i.next()
+ }
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.iter.Last() {
+ return i.prev()
+ }
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek)
+ if i.iter.Seek(ikey) {
+ i.dir = dirSOI
+ return i.next()
+ }
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) next() bool {
+ for {
+ if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if seq <= i.seq {
+ switch kt {
+ case keyTypeDel:
+ // Skip deleted key.
+ i.key = append(i.key[:0], ukey...)
+ i.dir = dirForward
+ case keyTypeVal:
+ if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
+ i.key = append(i.key[:0], ukey...)
+ i.value = append(i.value[:0], i.iter.Value()...)
+ i.dir = dirForward
+ return true
+ }
+ }
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ break
+ }
+ if !i.iter.Next() {
+ i.dir = dirEOI
+ i.iterErr()
+ break
+ }
+ }
+ return false
+}
+
+func (i *dbIter) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.iter.Next() || (i.dir == dirBackward && !i.iter.Next()) {
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+ }
+ return i.next()
+}
+
+func (i *dbIter) prev() bool {
+ i.dir = dirBackward
+ del := true
+ if i.iter.Valid() {
+ for {
+ if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if seq <= i.seq {
+ if !del && i.icmp.uCompare(ukey, i.key) < 0 {
+ return true
+ }
+ del = (kt == keyTypeDel)
+ if !del {
+ i.key = append(i.key[:0], ukey...)
+ i.value = append(i.value[:0], i.iter.Value()...)
+ }
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ return false
+ }
+ if !i.iter.Prev() {
+ break
+ }
+ }
+ }
+ if del {
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+ }
+ return true
+}
+
+func (i *dbIter) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirEOI:
+ return i.Last()
+ case dirForward:
+ for i.iter.Prev() {
+ if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if i.icmp.uCompare(ukey, i.key) < 0 {
+ goto cont
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ return false
+ }
+ }
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+ }
+
+cont:
+ return i.prev()
+}
+
+func (i *dbIter) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.key
+}
+
+func (i *dbIter) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.value
+}
+
+func (i *dbIter) Release() {
+ if i.dir != dirReleased {
+ // Clear the finalizer.
+ runtime.SetFinalizer(i, nil)
+
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+
+ i.dir = dirReleased
+ i.key = nil
+ i.value = nil
+ i.iter.Release()
+ i.iter = nil
+ atomic.AddInt32(&i.db.aliveIters, -1)
+ i.db = nil
+ }
+}
+
+func (i *dbIter) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *dbIter) Error() error {
+ return i.err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
new file mode 100644
index 000000000..2c69d2e53
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
@@ -0,0 +1,183 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "container/list"
+ "fmt"
+ "runtime"
+ "sync"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type snapshotElement struct {
+ seq uint64
+ ref int
+ e *list.Element
+}
+
+// Acquires a snapshot, based on latest sequence.
+func (db *DB) acquireSnapshot() *snapshotElement {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ seq := db.getSeq()
+
+ if e := db.snapsList.Back(); e != nil {
+ se := e.Value.(*snapshotElement)
+ if se.seq == seq {
+ se.ref++
+ return se
+ } else if seq < se.seq {
+ panic("leveldb: sequence number is not increasing")
+ }
+ }
+ se := &snapshotElement{seq: seq, ref: 1}
+ se.e = db.snapsList.PushBack(se)
+ return se
+}
+
+// Releases given snapshot element.
+func (db *DB) releaseSnapshot(se *snapshotElement) {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ se.ref--
+ if se.ref == 0 {
+ db.snapsList.Remove(se.e)
+ se.e = nil
+ } else if se.ref < 0 {
+ panic("leveldb: Snapshot: negative element reference")
+ }
+}
+
+// Gets minimum sequence that not being snapshotted.
+func (db *DB) minSeq() uint64 {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ if e := db.snapsList.Front(); e != nil {
+ return e.Value.(*snapshotElement).seq
+ }
+
+ return db.getSeq()
+}
+
+// Snapshot is a DB snapshot.
+type Snapshot struct {
+ db *DB
+ elem *snapshotElement
+ mu sync.RWMutex
+ released bool
+}
+
+// Creates new snapshot object.
+func (db *DB) newSnapshot() *Snapshot {
+ snap := &Snapshot{
+ db: db,
+ elem: db.acquireSnapshot(),
+ }
+ atomic.AddInt32(&db.aliveSnaps, 1)
+ runtime.SetFinalizer(snap, (*Snapshot).Release)
+ return snap
+}
+
+func (snap *Snapshot) String() string {
+ return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq)
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if
+// the DB does not contains the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = snap.db.ok()
+ if err != nil {
+ return
+ }
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
+ err = ErrSnapshotReleased
+ return
+ }
+ return snap.db.get(nil, nil, key, snap.elem.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = snap.db.ok()
+ if err != nil {
+ return
+ }
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
+ err = ErrSnapshotReleased
+ return
+ }
+ return snap.db.has(nil, nil, key, snap.elem.seq, ro)
+}
+
+// NewIterator returns an iterator for the snapshot of the underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+// Releasing the snapshot doesn't mean releasing the iterator too, the
+// iterator would be still valid until released.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := snap.db.ok(); err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+ if snap.released {
+ return iterator.NewEmptyIterator(ErrSnapshotReleased)
+ }
+ // Since iterator already hold version ref, it doesn't need to
+ // hold snapshot ref.
+ return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro)
+}
+
+// Release releases the snapshot. This will not release any returned
+// iterators, the iterators would still be valid until released or the
+// underlying DB is closed.
+//
+// Other methods should not be called after the snapshot has been released.
+func (snap *Snapshot) Release() {
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+
+ if !snap.released {
+ // Clear the finalizer.
+ runtime.SetFinalizer(snap, nil)
+
+ snap.released = true
+ snap.db.releaseSnapshot(snap.elem)
+ atomic.AddInt32(&snap.db.aliveSnaps, -1)
+ snap.db = nil
+ snap.elem = nil
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
new file mode 100644
index 000000000..85b02d24b
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
@@ -0,0 +1,234 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync/atomic"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+type memDB struct {
+ db *DB
+ *memdb.DB
+ ref int32
+}
+
+func (m *memDB) getref() int32 {
+ return atomic.LoadInt32(&m.ref)
+}
+
+func (m *memDB) incref() {
+ atomic.AddInt32(&m.ref, 1)
+}
+
+func (m *memDB) decref() {
+ if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
+ // Only put back memdb with std capacity.
+ if m.Capacity() == m.db.s.o.GetWriteBuffer() {
+ m.Reset()
+ m.db.mpoolPut(m.DB)
+ }
+ m.db = nil
+ m.DB = nil
+ } else if ref < 0 {
+ panic("negative memdb ref")
+ }
+}
+
+// Get latest sequence number.
+func (db *DB) getSeq() uint64 {
+ return atomic.LoadUint64(&db.seq)
+}
+
+// Atomically adds delta to seq.
+func (db *DB) addSeq(delta uint64) {
+ atomic.AddUint64(&db.seq, delta)
+}
+
+func (db *DB) setSeq(seq uint64) {
+ atomic.StoreUint64(&db.seq, seq)
+}
+
+func (db *DB) sampleSeek(ikey internalKey) {
+ v := db.s.version()
+ if v.sampleSeek(ikey) {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ v.release()
+}
+
+func (db *DB) mpoolPut(mem *memdb.DB) {
+ if !db.isClosed() {
+ select {
+ case db.memPool <- mem:
+ default:
+ }
+ }
+}
+
+func (db *DB) mpoolGet(n int) *memDB {
+ var mdb *memdb.DB
+ select {
+ case mdb = <-db.memPool:
+ default:
+ }
+ if mdb == nil || mdb.Capacity() < n {
+ mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
+ }
+ return &memDB{
+ db: db,
+ DB: mdb,
+ }
+}
+
+func (db *DB) mpoolDrain() {
+ ticker := time.NewTicker(30 * time.Second)
+ for {
+ select {
+ case <-ticker.C:
+ select {
+ case <-db.memPool:
+ default:
+ }
+ case <-db.closeC:
+ ticker.Stop()
+ // Make sure the pool is drained.
+ select {
+ case <-db.memPool:
+ case <-time.After(time.Second):
+ }
+ close(db.memPool)
+ return
+ }
+ }
+}
+
+// Create new memdb and froze the old one; need external synchronization.
+// newMem only called synchronously by the writer.
+func (db *DB) newMem(n int) (mem *memDB, err error) {
+ fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()}
+ w, err := db.s.stor.Create(fd)
+ if err != nil {
+ db.s.reuseFileNum(fd.Num)
+ return
+ }
+
+ db.memMu.Lock()
+ defer db.memMu.Unlock()
+
+ if db.frozenMem != nil {
+ panic("still has frozen mem")
+ }
+
+ if db.journal == nil {
+ db.journal = journal.NewWriter(w)
+ } else {
+ db.journal.Reset(w)
+ db.journalWriter.Close()
+ db.frozenJournalFd = db.journalFd
+ }
+ db.journalWriter = w
+ db.journalFd = fd
+ db.frozenMem = db.mem
+ mem = db.mpoolGet(n)
+ mem.incref() // for self
+ mem.incref() // for caller
+ db.mem = mem
+ // The seq only incremented by the writer. And whoever called newMem
+ // should hold write lock, so no need additional synchronization here.
+ db.frozenSeq = db.seq
+ return
+}
+
+// Get all memdbs.
+func (db *DB) getMems() (e, f *memDB) {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem != nil {
+ db.mem.incref()
+ } else if !db.isClosed() {
+ panic("nil effective mem")
+ }
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.mem, db.frozenMem
+}
+
+// Get effective memdb.
+func (db *DB) getEffectiveMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem != nil {
+ db.mem.incref()
+ } else if !db.isClosed() {
+ panic("nil effective mem")
+ }
+ return db.mem
+}
+
+// Check whether we has frozen memdb.
+func (db *DB) hasFrozenMem() bool {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ return db.frozenMem != nil
+}
+
+// Get frozen memdb.
+func (db *DB) getFrozenMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.frozenMem
+}
+
+// Drop frozen memdb; assume that frozen memdb isn't nil.
+func (db *DB) dropFrozenMem() {
+ db.memMu.Lock()
+ if err := db.s.stor.Remove(db.frozenJournalFd); err != nil {
+ db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err)
+ } else {
+ db.logf("journal@remove removed @%d", db.frozenJournalFd.Num)
+ }
+ db.frozenJournalFd = storage.FileDesc{}
+ db.frozenMem.decref()
+ db.frozenMem = nil
+ db.memMu.Unlock()
+}
+
+// Clear mems ptr; used by DB.Close().
+func (db *DB) clearMems() {
+ db.memMu.Lock()
+ db.mem = nil
+ db.frozenMem = nil
+ db.memMu.Unlock()
+}
+
+// Set closed flag; return true if not already closed.
+func (db *DB) setClosed() bool {
+ return atomic.CompareAndSwapUint32(&db.closed, 0, 1)
+}
+
+// Check whether DB was closed.
+func (db *DB) isClosed() bool {
+ return atomic.LoadUint32(&db.closed) != 0
+}
+
+// Check read ok status.
+func (db *DB) ok() error {
+ if db.isClosed() {
+ return ErrClosed
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
new file mode 100644
index 000000000..b8f7e7d21
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
@@ -0,0 +1,325 @@
+// Copyright (c) 2016, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "errors"
+ "sync"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var errTransactionDone = errors.New("leveldb: transaction already closed")
+
+// Transaction is the transaction handle.
+type Transaction struct {
+ db *DB
+ lk sync.RWMutex
+ seq uint64
+ mem *memDB
+ tables tFiles
+ ikScratch []byte
+ rec sessionRecord
+ stats cStatStaging
+ closed bool
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return nil, errTransactionDone
+ }
+ return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Has returns.
+func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return false, errTransactionDone
+ }
+ return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the transaction.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently while writes to the
+// transaction. The resultant key/value pairs are guaranteed to be consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return iterator.NewEmptyIterator(errTransactionDone)
+ }
+ tr.mem.incref()
+ return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro)
+}
+
+func (tr *Transaction) flush() error {
+ // Flush memdb.
+ if tr.mem.Len() != 0 {
+ tr.stats.startTimer()
+ iter := tr.mem.NewIterator(nil)
+ t, n, err := tr.db.s.tops.createFrom(iter)
+ iter.Release()
+ tr.stats.stopTimer()
+ if err != nil {
+ return err
+ }
+ if tr.mem.getref() == 1 {
+ tr.mem.Reset()
+ } else {
+ tr.mem.decref()
+ tr.mem = tr.db.mpoolGet(0)
+ tr.mem.incref()
+ }
+ tr.tables = append(tr.tables, t)
+ tr.rec.addTableFile(0, t)
+ tr.stats.write += t.size
+ tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+ }
+ return nil
+}
+
+func (tr *Transaction) put(kt keyType, key, value []byte) error {
+ tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt)
+ if tr.mem.Free() < len(tr.ikScratch)+len(value) {
+ if err := tr.flush(); err != nil {
+ return err
+ }
+ }
+ if err := tr.mem.Put(tr.ikScratch, value); err != nil {
+ return err
+ }
+ tr.seq++
+ return nil
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error {
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return tr.put(keyTypeVal, key, value)
+}
+
+// Delete deletes the value for the given key.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error {
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return tr.put(keyTypeDel, key, nil)
+}
+
+// Write apply the given batch to the transaction. The batch will be applied
+// sequentially.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Write returns.
+func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error {
+ if b == nil || b.Len() == 0 {
+ return nil
+ }
+
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return b.replayInternal(func(i int, kt keyType, k, v []byte) error {
+ return tr.put(kt, k, v)
+ })
+}
+
+func (tr *Transaction) setDone() {
+ tr.closed = true
+ tr.db.tr = nil
+ tr.mem.decref()
+ <-tr.db.writeLockC
+}
+
+// Commit commits the transaction. If error is not nil, then the transaction is
+// not committed, it can then either be retried or discarded.
+//
+// Other methods should not be called after transaction has been committed.
+func (tr *Transaction) Commit() error {
+ if err := tr.db.ok(); err != nil {
+ return err
+ }
+
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ if err := tr.flush(); err != nil {
+ // Return error, lets user decide either to retry or discard
+ // transaction.
+ return err
+ }
+ if len(tr.tables) != 0 {
+ // Committing transaction.
+ tr.rec.setSeqNum(tr.seq)
+ tr.db.compCommitLk.Lock()
+ tr.stats.startTimer()
+ var cerr error
+ for retry := 0; retry < 3; retry++ {
+ cerr = tr.db.s.commit(&tr.rec)
+ if cerr != nil {
+ tr.db.logf("transaction@commit error R·%d %q", retry, cerr)
+ select {
+ case <-time.After(time.Second):
+ case <-tr.db.closeC:
+ tr.db.logf("transaction@commit exiting")
+ tr.db.compCommitLk.Unlock()
+ return cerr
+ }
+ } else {
+ // Success. Set db.seq.
+ tr.db.setSeq(tr.seq)
+ break
+ }
+ }
+ tr.stats.stopTimer()
+ if cerr != nil {
+ // Return error, lets user decide either to retry or discard
+ // transaction.
+ return cerr
+ }
+
+ // Update compaction stats. This is safe as long as we hold compCommitLk.
+ tr.db.compStats.addStat(0, &tr.stats)
+
+ // Trigger table auto-compaction.
+ tr.db.compTrigger(tr.db.tcompCmdC)
+ tr.db.compCommitLk.Unlock()
+
+ // Additionally, wait compaction when certain threshold reached.
+ // Ignore error, returns error only if transaction can't be committed.
+ tr.db.waitCompaction()
+ }
+ // Only mark as done if transaction committed successfully.
+ tr.setDone()
+ return nil
+}
+
+func (tr *Transaction) discard() {
+ // Discard transaction.
+ for _, t := range tr.tables {
+ tr.db.logf("transaction@discard @%d", t.fd.Num)
+ if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil {
+ tr.db.s.reuseFileNum(t.fd.Num)
+ }
+ }
+}
+
+// Discard discards the transaction.
+//
+// Other methods should not be called after transaction has been discarded.
+func (tr *Transaction) Discard() {
+ tr.lk.Lock()
+ if !tr.closed {
+ tr.discard()
+ tr.setDone()
+ }
+ tr.lk.Unlock()
+}
+
+func (db *DB) waitCompaction() error {
+ if db.s.tLen(0) >= db.s.o.GetWriteL0PauseTrigger() {
+ return db.compTriggerWait(db.tcompCmdC)
+ }
+ return nil
+}
+
+// OpenTransaction opens an atomic DB transaction. Only one transaction can be
+// opened at a time. Subsequent call to Write and OpenTransaction will be blocked
+// until in-flight transaction is committed or discarded.
+// The returned transaction handle is safe for concurrent use.
+//
+// Transaction is expensive and can overwhelm compaction, especially if
+// transaction size is small. Use with caution.
+//
+// The transaction must be closed once done, either by committing or discarding
+// the transaction.
+// Closing the DB will discard open transaction.
+func (db *DB) OpenTransaction() (*Transaction, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ // The write happen synchronously.
+ select {
+ case db.writeLockC <- struct{}{}:
+ case err := <-db.compPerErrC:
+ return nil, err
+ case <-db.closeC:
+ return nil, ErrClosed
+ }
+
+ if db.tr != nil {
+ panic("leveldb: has open transaction")
+ }
+
+ // Flush current memdb.
+ if db.mem != nil && db.mem.Len() != 0 {
+ if _, err := db.rotateMem(0, true); err != nil {
+ return nil, err
+ }
+ }
+
+ // Wait compaction when certain threshold reached.
+ if err := db.waitCompaction(); err != nil {
+ return nil, err
+ }
+
+ tr := &Transaction{
+ db: db,
+ seq: db.seq,
+ mem: db.mpoolGet(0),
+ }
+ tr.mem.incref()
+ db.tr = tr
+ return tr, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
new file mode 100644
index 000000000..7ecd960d2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
@@ -0,0 +1,102 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader is the interface that wraps basic Get and NewIterator methods.
+// This interface implemented by both DB and Snapshot.
+type Reader interface {
+ Get(key []byte, ro *opt.ReadOptions) (value []byte, err error)
+ NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator
+}
+
+// Sizes is list of size.
+type Sizes []int64
+
+// Sum returns sum of the sizes.
+func (sizes Sizes) Sum() int64 {
+ var sum int64
+ for _, size := range sizes {
+ sum += size
+ }
+ return sum
+}
+
+// Logging.
+func (db *DB) log(v ...interface{}) { db.s.log(v...) }
+func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) }
+
+// Check and clean files.
+func (db *DB) checkAndCleanFiles() error {
+ v := db.s.version()
+ defer v.release()
+
+ tmap := make(map[int64]bool)
+ for _, tables := range v.levels {
+ for _, t := range tables {
+ tmap[t.fd.Num] = false
+ }
+ }
+
+ fds, err := db.s.stor.List(storage.TypeAll)
+ if err != nil {
+ return err
+ }
+
+ var nt int
+ var rem []storage.FileDesc
+ for _, fd := range fds {
+ keep := true
+ switch fd.Type {
+ case storage.TypeManifest:
+ keep = fd.Num >= db.s.manifestFd.Num
+ case storage.TypeJournal:
+ if !db.frozenJournalFd.Zero() {
+ keep = fd.Num >= db.frozenJournalFd.Num
+ } else {
+ keep = fd.Num >= db.journalFd.Num
+ }
+ case storage.TypeTable:
+ _, keep = tmap[fd.Num]
+ if keep {
+ tmap[fd.Num] = true
+ nt++
+ }
+ }
+
+ if !keep {
+ rem = append(rem, fd)
+ }
+ }
+
+ if nt != len(tmap) {
+ var mfds []storage.FileDesc
+ for num, present := range tmap {
+ if !present {
+ mfds = append(mfds, storage.FileDesc{storage.TypeTable, num})
+ db.logf("db@janitor table missing @%d", num)
+ }
+ }
+ return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds})
+ }
+
+ db.logf("db@janitor F·%d G·%d", len(fds), len(rem))
+ for _, fd := range rem {
+ db.logf("db@janitor removing %s-%d", fd.Type, fd.Num)
+ if err := db.s.stor.Remove(fd); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
new file mode 100644
index 000000000..cc428b695
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
@@ -0,0 +1,443 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func (db *DB) writeJournal(batches []*Batch, seq uint64, sync bool) error {
+ wr, err := db.journal.Next()
+ if err != nil {
+ return err
+ }
+ if err := writeBatchesWithHeader(wr, batches, seq); err != nil {
+ return err
+ }
+ if err := db.journal.Flush(); err != nil {
+ return err
+ }
+ if sync {
+ return db.journalWriter.Sync()
+ }
+ return nil
+}
+
+func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) {
+ // Wait for pending memdb compaction.
+ err = db.compTriggerWait(db.mcompCmdC)
+ if err != nil {
+ return
+ }
+
+ // Create new memdb and journal.
+ mem, err = db.newMem(n)
+ if err != nil {
+ return
+ }
+
+ // Schedule memdb compaction.
+ if wait {
+ err = db.compTriggerWait(db.mcompCmdC)
+ } else {
+ db.compTrigger(db.mcompCmdC)
+ }
+ return
+}
+
+func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
+ delayed := false
+ slowdownTrigger := db.s.o.GetWriteL0SlowdownTrigger()
+ pauseTrigger := db.s.o.GetWriteL0PauseTrigger()
+ flush := func() (retry bool) {
+ mdb = db.getEffectiveMem()
+ if mdb == nil {
+ err = ErrClosed
+ return false
+ }
+ defer func() {
+ if retry {
+ mdb.decref()
+ mdb = nil
+ }
+ }()
+ tLen := db.s.tLen(0)
+ mdbFree = mdb.Free()
+ switch {
+ case tLen >= slowdownTrigger && !delayed:
+ delayed = true
+ time.Sleep(time.Millisecond)
+ case mdbFree >= n:
+ return false
+ case tLen >= pauseTrigger:
+ delayed = true
+ err = db.compTriggerWait(db.tcompCmdC)
+ if err != nil {
+ return false
+ }
+ default:
+ // Allow memdb to grow if it has no entry.
+ if mdb.Len() == 0 {
+ mdbFree = n
+ } else {
+ mdb.decref()
+ mdb, err = db.rotateMem(n, false)
+ if err == nil {
+ mdbFree = mdb.Free()
+ } else {
+ mdbFree = 0
+ }
+ }
+ return false
+ }
+ return true
+ }
+ start := time.Now()
+ for flush() {
+ }
+ if delayed {
+ db.writeDelay += time.Since(start)
+ db.writeDelayN++
+ } else if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+ db.writeDelay = 0
+ db.writeDelayN = 0
+ }
+ return
+}
+
+type writeMerge struct {
+ sync bool
+ batch *Batch
+ keyType keyType
+ key, value []byte
+}
+
+func (db *DB) unlockWrite(overflow bool, merged int, err error) {
+ for i := 0; i < merged; i++ {
+ db.writeAckC <- err
+ }
+ if overflow {
+ // Pass lock to the next write (that failed to merge).
+ db.writeMergedC <- false
+ } else {
+ // Release lock.
+ <-db.writeLockC
+ }
+}
+
+// ourBatch if defined should equal with batch.
+func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error {
+ // Try to flush memdb. This method would also trying to throttle writes
+ // if it is too fast and compaction cannot catch-up.
+ mdb, mdbFree, err := db.flush(batch.internalLen)
+ if err != nil {
+ db.unlockWrite(false, 0, err)
+ return err
+ }
+ defer mdb.decref()
+
+ var (
+ overflow bool
+ merged int
+ batches = []*Batch{batch}
+ )
+
+ if merge {
+ // Merge limit.
+ var mergeLimit int
+ if batch.internalLen > 128<<10 {
+ mergeLimit = (1 << 20) - batch.internalLen
+ } else {
+ mergeLimit = 128 << 10
+ }
+ mergeCap := mdbFree - batch.internalLen
+ if mergeLimit > mergeCap {
+ mergeLimit = mergeCap
+ }
+
+ merge:
+ for mergeLimit > 0 {
+ select {
+ case incoming := <-db.writeMergeC:
+ if incoming.batch != nil {
+ // Merge batch.
+ if incoming.batch.internalLen > mergeLimit {
+ overflow = true
+ break merge
+ }
+ batches = append(batches, incoming.batch)
+ mergeLimit -= incoming.batch.internalLen
+ } else {
+ // Merge put.
+ internalLen := len(incoming.key) + len(incoming.value) + 8
+ if internalLen > mergeLimit {
+ overflow = true
+ break merge
+ }
+ if ourBatch == nil {
+ ourBatch = db.batchPool.Get().(*Batch)
+ ourBatch.Reset()
+ batches = append(batches, ourBatch)
+ }
+ // We can use same batch since concurrent write doesn't
+ // guarantee write order.
+ ourBatch.appendRec(incoming.keyType, incoming.key, incoming.value)
+ mergeLimit -= internalLen
+ }
+ sync = sync || incoming.sync
+ merged++
+ db.writeMergedC <- true
+
+ default:
+ break merge
+ }
+ }
+ }
+
+ // Seq number.
+ seq := db.seq + 1
+
+ // Write journal.
+ if err := db.writeJournal(batches, seq, sync); err != nil {
+ db.unlockWrite(overflow, merged, err)
+ return err
+ }
+
+ // Put batches.
+ for _, batch := range batches {
+ if err := batch.putMem(seq, mdb.DB); err != nil {
+ panic(err)
+ }
+ seq += uint64(batch.Len())
+ }
+
+ // Incr seq number.
+ db.addSeq(uint64(batchesLen(batches)))
+
+ // Rotate memdb if it's reach the threshold.
+ if batch.internalLen >= mdbFree {
+ db.rotateMem(0, false)
+ }
+
+ db.unlockWrite(overflow, merged, nil)
+ return nil
+}
+
+// Write apply the given batch to the DB. The batch records will be applied
+// sequentially. Write might be used concurrently, when used concurrently and
+// batch is small enough, write will try to merge the batches. Set NoWriteMerge
+// option to true to disable write merge.
+//
+// It is safe to modify the contents of the arguments after Write returns but
+// not before. Write will not modify content of the batch.
+func (db *DB) Write(batch *Batch, wo *opt.WriteOptions) error {
+ if err := db.ok(); err != nil || batch == nil || batch.Len() == 0 {
+ return err
+ }
+
+ // If the batch size is larger than write buffer, it may justified to write
+ // using transaction instead. Using transaction the batch will be written
+ // into tables directly, skipping the journaling.
+ if batch.internalLen > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() {
+ tr, err := db.OpenTransaction()
+ if err != nil {
+ return err
+ }
+ if err := tr.Write(batch, wo); err != nil {
+ tr.Discard()
+ return err
+ }
+ return tr.Commit()
+ }
+
+ merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+ sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+ // Acquire write lock.
+ if merge {
+ select {
+ case db.writeMergeC <- writeMerge{sync: sync, batch: batch}:
+ if <-db.writeMergedC {
+ // Write is merged.
+ return <-db.writeAckC
+ }
+ // Write is not merged, the write lock is handed to us. Continue.
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ } else {
+ select {
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ }
+
+ return db.writeLocked(batch, nil, merge, sync)
+}
+
+func (db *DB) putRec(kt keyType, key, value []byte, wo *opt.WriteOptions) error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+ sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+ // Acquire write lock.
+ if merge {
+ select {
+ case db.writeMergeC <- writeMerge{sync: sync, keyType: kt, key: key, value: value}:
+ if <-db.writeMergedC {
+ // Write is merged.
+ return <-db.writeAckC
+ }
+ // Write is not merged, the write lock is handed to us. Continue.
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ } else {
+ select {
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ }
+
+ batch := db.batchPool.Get().(*Batch)
+ batch.Reset()
+ batch.appendRec(kt, key, value)
+ return db.writeLocked(batch, batch, merge, sync)
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map. Write merge also applies for Put, see
+// Write.
+//
+// It is safe to modify the contents of the arguments after Put returns but not
+// before.
+func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
+ return db.putRec(keyTypeVal, key, value, wo)
+}
+
+// Delete deletes the value for the given key. Delete will not returns error if
+// key doesn't exist. Write merge also applies for Delete, see Write.
+//
+// It is safe to modify the contents of the arguments after Delete returns but
+// not before.
+func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
+ return db.putRec(keyTypeDel, key, nil, wo)
+}
+
+func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
+ iter := mem.NewIterator(nil)
+ defer iter.Release()
+ return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) &&
+ (min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0))
+}
+
+// CompactRange compacts the underlying DB for the given key range.
+// In particular, deleted and overwritten versions are discarded,
+// and the data is rearranged to reduce the cost of operations
+// needed to access the data. This operation should typically only
+// be invoked by users who understand the underlying implementation.
+//
+// A nil Range.Start is treated as a key before all keys in the DB.
+// And a nil Range.Limit is treated as a key after all keys in the DB.
+// Therefore if both is nil then it will compact entire DB.
+func (db *DB) CompactRange(r util.Range) error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ // Lock writer.
+ select {
+ case db.writeLockC <- struct{}{}:
+ case err := <-db.compPerErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ // Check for overlaps in memdb.
+ mdb := db.getEffectiveMem()
+ if mdb == nil {
+ return ErrClosed
+ }
+ defer mdb.decref()
+ if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
+ // Memdb compaction.
+ if _, err := db.rotateMem(0, false); err != nil {
+ <-db.writeLockC
+ return err
+ }
+ <-db.writeLockC
+ if err := db.compTriggerWait(db.mcompCmdC); err != nil {
+ return err
+ }
+ } else {
+ <-db.writeLockC
+ }
+
+ // Table compaction.
+ return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit)
+}
+
+// SetReadOnly makes DB read-only. It will stay read-only until reopened.
+func (db *DB) SetReadOnly() error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ // Lock writer.
+ select {
+ case db.writeLockC <- struct{}{}:
+ db.compWriteLocking = true
+ case err := <-db.compPerErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ // Set compaction read-only.
+ select {
+ case db.compErrSetC <- ErrReadOnly:
+ case perr := <-db.compPerErrC:
+ return perr
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/doc.go b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
new file mode 100644
index 000000000..53f13bb24
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
@@ -0,0 +1,90 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package leveldb provides implementation of LevelDB key/value database.
+//
+// Create or open a database:
+//
+// db, err := leveldb.OpenFile("path/to/db", nil)
+// ...
+// defer db.Close()
+// ...
+//
+// Read or modify the database content:
+//
+// // Remember that the contents of the returned slice should not be modified.
+// data, err := db.Get([]byte("key"), nil)
+// ...
+// err = db.Put([]byte("key"), []byte("value"), nil)
+// ...
+// err = db.Delete([]byte("key"), nil)
+// ...
+//
+// Iterate over database content:
+//
+// iter := db.NewIterator(nil, nil)
+// for iter.Next() {
+// // Remember that the contents of the returned slice should not be modified, and
+// // only valid until the next call to Next.
+// key := iter.Key()
+// value := iter.Value()
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Iterate over subset of database content with a particular prefix:
+// iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
+// for iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Seek-then-Iterate:
+//
+// iter := db.NewIterator(nil, nil)
+// for ok := iter.Seek(key); ok; ok = iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Iterate over subset of database content:
+//
+// iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil)
+// for iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Batch writes:
+//
+// batch := new(leveldb.Batch)
+// batch.Put([]byte("foo"), []byte("value"))
+// batch.Put([]byte("bar"), []byte("another value"))
+// batch.Delete([]byte("baz"))
+// err = db.Write(batch, nil)
+// ...
+//
+// Use bloom filter:
+//
+// o := &opt.Options{
+// Filter: filter.NewBloomFilter(10),
+// }
+// db, err := leveldb.OpenFile("path/to/db", o)
+// ...
+// defer db.Close()
+// ...
+package leveldb
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
new file mode 100644
index 000000000..de2649812
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
@@ -0,0 +1,20 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+)
+
+// Common errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrReadOnly = errors.New("leveldb: read-only mode")
+ ErrSnapshotReleased = errors.New("leveldb: snapshot released")
+ ErrIterReleased = errors.New("leveldb: iterator released")
+ ErrClosed = errors.New("leveldb: closed")
+)
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
new file mode 100644
index 000000000..8d6146b6f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package errors provides common error types used throughout leveldb.
+package errors
+
+import (
+ "errors"
+ "fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+ ErrNotFound = New("leveldb: not found")
+ ErrReleased = util.ErrReleased
+ ErrHasReleaser = util.ErrHasReleaser
+)
+
+// New returns an error that formats as the given text.
+func New(text string) error {
+ return errors.New(text)
+}
+
+// ErrCorrupted is the type that wraps errors that indicate corruption in
+// the database.
+type ErrCorrupted struct {
+ Fd storage.FileDesc
+ Err error
+}
+
+func (e *ErrCorrupted) Error() string {
+ if !e.Fd.Zero() {
+ return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+ }
+ return e.Err.Error()
+}
+
+// NewErrCorrupted creates new ErrCorrupted error.
+func NewErrCorrupted(fd storage.FileDesc, err error) error {
+ return &ErrCorrupted{fd, err}
+}
+
+// IsCorrupted returns a boolean indicating whether the error is indicating
+// a corruption.
+func IsCorrupted(err error) bool {
+ switch err.(type) {
+ case *ErrCorrupted:
+ return true
+ case *storage.ErrCorrupted:
+ return true
+ }
+ return false
+}
+
+// ErrMissingFiles is the type that indicating a corruption due to missing
+// files. ErrMissingFiles always wrapped with ErrCorrupted.
+type ErrMissingFiles struct {
+ Fds []storage.FileDesc
+}
+
+func (e *ErrMissingFiles) Error() string { return "file missing" }
+
+// SetFd sets 'file info' of the given error with the given file.
+// Currently only ErrCorrupted is supported, otherwise will do nothing.
+func SetFd(err error, fd storage.FileDesc) error {
+ switch x := err.(type) {
+ case *ErrCorrupted:
+ x.Fd = fd
+ return x
+ }
+ return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
new file mode 100644
index 000000000..e961e420d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
@@ -0,0 +1,31 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+type iFilter struct {
+ filter.Filter
+}
+
+func (f iFilter) Contains(filter, key []byte) bool {
+ return f.Filter.Contains(filter, internalKey(key).ukey())
+}
+
+func (f iFilter) NewGenerator() filter.FilterGenerator {
+ return iFilterGenerator{f.Filter.NewGenerator()}
+}
+
+type iFilterGenerator struct {
+ filter.FilterGenerator
+}
+
+func (g iFilterGenerator) Add(key []byte) {
+ g.FilterGenerator.Add(internalKey(key).ukey())
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
new file mode 100644
index 000000000..bab0e9970
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
@@ -0,0 +1,116 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package filter
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func bloomHash(key []byte) uint32 {
+ return util.Hash(key, 0xbc9f1d34)
+}
+
+type bloomFilter int
+
+// The bloom filter serializes its parameters and is backward compatible
+// with respect to them. Therefor, its parameters are not added to its
+// name.
+func (bloomFilter) Name() string {
+ return "leveldb.BuiltinBloomFilter"
+}
+
+func (f bloomFilter) Contains(filter, key []byte) bool {
+ nBytes := len(filter) - 1
+ if nBytes < 1 {
+ return false
+ }
+ nBits := uint32(nBytes * 8)
+
+ // Use the encoded k so that we can read filters generated by
+ // bloom filters created using different parameters.
+ k := filter[nBytes]
+ if k > 30 {
+ // Reserved for potentially new encodings for short bloom filters.
+ // Consider it a match.
+ return true
+ }
+
+ kh := bloomHash(key)
+ delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+ for j := uint8(0); j < k; j++ {
+ bitpos := kh % nBits
+ if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
+ return false
+ }
+ kh += delta
+ }
+ return true
+}
+
+func (f bloomFilter) NewGenerator() FilterGenerator {
+ // Round down to reduce probing cost a little bit.
+ k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
+ if k < 1 {
+ k = 1
+ } else if k > 30 {
+ k = 30
+ }
+ return &bloomFilterGenerator{
+ n: int(f),
+ k: k,
+ }
+}
+
+type bloomFilterGenerator struct {
+ n int
+ k uint8
+
+ keyHashes []uint32
+}
+
+func (g *bloomFilterGenerator) Add(key []byte) {
+ // Use double-hashing to generate a sequence of hash values.
+ // See analysis in [Kirsch,Mitzenmacher 2006].
+ g.keyHashes = append(g.keyHashes, bloomHash(key))
+}
+
+func (g *bloomFilterGenerator) Generate(b Buffer) {
+ // Compute bloom filter size (in both bits and bytes)
+ nBits := uint32(len(g.keyHashes) * g.n)
+ // For small n, we can see a very high false positive rate. Fix it
+ // by enforcing a minimum bloom filter length.
+ if nBits < 64 {
+ nBits = 64
+ }
+ nBytes := (nBits + 7) / 8
+ nBits = nBytes * 8
+
+ dest := b.Alloc(int(nBytes) + 1)
+ dest[nBytes] = g.k
+ for _, kh := range g.keyHashes {
+ delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+ for j := uint8(0); j < g.k; j++ {
+ bitpos := kh % nBits
+ dest[bitpos/8] |= (1 << (bitpos % 8))
+ kh += delta
+ }
+ }
+
+ g.keyHashes = g.keyHashes[:0]
+}
+
+// NewBloomFilter creates a new initialized bloom filter for given
+// bitsPerKey.
+//
+// Since bitsPerKey is persisted individually for each bloom filter
+// serialization, bloom filters are backwards compatible with respect to
+// changing bitsPerKey. This means that no big performance penalty will
+// be experienced when changing the parameter. See documentation for
+// opt.Options.Filter for more information.
+func NewBloomFilter(bitsPerKey int) Filter {
+ return bloomFilter(bitsPerKey)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
new file mode 100644
index 000000000..7a925c5a8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
@@ -0,0 +1,60 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package filter provides interface and implementation of probabilistic
+// data structure.
+//
+// The filter is resposible for creating small filter from a set of keys.
+// These filter will then used to test whether a key is a member of the set.
+// In many cases, a filter can cut down the number of disk seeks from a
+// handful to a single disk seek per DB.Get call.
+package filter
+
+// Buffer is the interface that wraps basic Alloc, Write and WriteByte methods.
+type Buffer interface {
+ // Alloc allocs n bytes of slice from the buffer. This also advancing
+ // write offset.
+ Alloc(n int) []byte
+
+ // Write appends the contents of p to the buffer.
+ Write(p []byte) (n int, err error)
+
+ // WriteByte appends the byte c to the buffer.
+ WriteByte(c byte) error
+}
+
+// Filter is the filter.
+type Filter interface {
+ // Name returns the name of this policy.
+ //
+ // Note that if the filter encoding changes in an incompatible way,
+ // the name returned by this method must be changed. Otherwise, old
+ // incompatible filters may be passed to methods of this type.
+ Name() string
+
+ // NewGenerator creates a new filter generator.
+ NewGenerator() FilterGenerator
+
+ // Contains returns true if the filter contains the given key.
+ //
+ // The filter are filters generated by the filter generator.
+ Contains(filter, key []byte) bool
+}
+
+// FilterGenerator is the filter generator.
+type FilterGenerator interface {
+ // Add adds a key to the filter generator.
+ //
+ // The key may become invalid after call to this method end, therefor
+ // key must be copied if implementation require keeping key for later
+ // use. The key should not modified directly, doing so may cause
+ // undefined results.
+ Add(key []byte)
+
+ // Generate generates filters based on keys passed so far. After call
+ // to Generate the filter generator maybe resetted, depends on implementation.
+ Generate(b Buffer)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
new file mode 100644
index 000000000..a23ab05f7
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
@@ -0,0 +1,184 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// BasicArray is the interface that wraps basic Len and Search method.
+type BasicArray interface {
+ // Len returns length of the array.
+ Len() int
+
+ // Search finds smallest index that point to a key that is greater
+ // than or equal to the given key.
+ Search(key []byte) int
+}
+
+// Array is the interface that wraps BasicArray and basic Index method.
+type Array interface {
+ BasicArray
+
+ // Index returns key/value pair with index of i.
+ Index(i int) (key, value []byte)
+}
+
+// Array is the interface that wraps BasicArray and basic Get method.
+type ArrayIndexer interface {
+ BasicArray
+
+ // Get returns a new data iterator with index of i.
+ Get(i int) Iterator
+}
+
+type basicArrayIterator struct {
+ util.BasicReleaser
+ array BasicArray
+ pos int
+ err error
+}
+
+func (i *basicArrayIterator) Valid() bool {
+ return i.pos >= 0 && i.pos < i.array.Len() && !i.Released()
+}
+
+func (i *basicArrayIterator) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.array.Len() == 0 {
+ i.pos = -1
+ return false
+ }
+ i.pos = 0
+ return true
+}
+
+func (i *basicArrayIterator) Last() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ n := i.array.Len()
+ if n == 0 {
+ i.pos = 0
+ return false
+ }
+ i.pos = n - 1
+ return true
+}
+
+func (i *basicArrayIterator) Seek(key []byte) bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ n := i.array.Len()
+ if n == 0 {
+ i.pos = 0
+ return false
+ }
+ i.pos = i.array.Search(key)
+ if i.pos >= n {
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Next() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.pos++
+ if n := i.array.Len(); i.pos >= n {
+ i.pos = n
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Prev() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.pos--
+ if i.pos < 0 {
+ i.pos = -1
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Error() error { return i.err }
+
+type arrayIterator struct {
+ basicArrayIterator
+ array Array
+ pos int
+ key, value []byte
+}
+
+func (i *arrayIterator) updateKV() {
+ if i.pos == i.basicArrayIterator.pos {
+ return
+ }
+ i.pos = i.basicArrayIterator.pos
+ if i.Valid() {
+ i.key, i.value = i.array.Index(i.pos)
+ } else {
+ i.key = nil
+ i.value = nil
+ }
+}
+
+func (i *arrayIterator) Key() []byte {
+ i.updateKV()
+ return i.key
+}
+
+func (i *arrayIterator) Value() []byte {
+ i.updateKV()
+ return i.value
+}
+
+type arrayIteratorIndexer struct {
+ basicArrayIterator
+ array ArrayIndexer
+}
+
+func (i *arrayIteratorIndexer) Get() Iterator {
+ if i.Valid() {
+ return i.array.Get(i.basicArrayIterator.pos)
+ }
+ return nil
+}
+
+// NewArrayIterator returns an iterator from the given array.
+func NewArrayIterator(array Array) Iterator {
+ return &arrayIterator{
+ basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+ array: array,
+ pos: -1,
+ }
+}
+
+// NewArrayIndexer returns an index iterator from the given array.
+func NewArrayIndexer(array ArrayIndexer) IteratorIndexer {
+ return &arrayIteratorIndexer{
+ basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+ array: array,
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
new file mode 100644
index 000000000..939adbb93
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
@@ -0,0 +1,242 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// IteratorIndexer is the interface that wraps CommonIterator and basic Get
+// method. IteratorIndexer provides index for indexed iterator.
+type IteratorIndexer interface {
+ CommonIterator
+
+ // Get returns a new data iterator for the current position, or nil if
+ // done.
+ Get() Iterator
+}
+
+type indexedIterator struct {
+ util.BasicReleaser
+ index IteratorIndexer
+ strict bool
+
+ data Iterator
+ err error
+ errf func(err error)
+ closed bool
+}
+
+func (i *indexedIterator) setData() {
+ if i.data != nil {
+ i.data.Release()
+ }
+ i.data = i.index.Get()
+}
+
+func (i *indexedIterator) clearData() {
+ if i.data != nil {
+ i.data.Release()
+ }
+ i.data = nil
+}
+
+func (i *indexedIterator) indexErr() {
+ if err := i.index.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ i.err = err
+ }
+}
+
+func (i *indexedIterator) dataErr() bool {
+ if err := i.data.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ if i.strict || !errors.IsCorrupted(err) {
+ i.err = err
+ return true
+ }
+ }
+ return false
+}
+
+func (i *indexedIterator) Valid() bool {
+ return i.data != nil && i.data.Valid()
+}
+
+func (i *indexedIterator) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.First() {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ return i.Next()
+}
+
+func (i *indexedIterator) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.Last() {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ if !i.data.Last() {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Prev()
+ }
+ return true
+}
+
+func (i *indexedIterator) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.Seek(key) {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ if !i.data.Seek(key) {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Next()
+ }
+ return true
+}
+
+func (i *indexedIterator) Next() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch {
+ case i.data != nil && !i.data.Next():
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ fallthrough
+ case i.data == nil:
+ if !i.index.Next() {
+ i.indexErr()
+ return false
+ }
+ i.setData()
+ return i.Next()
+ }
+ return true
+}
+
+func (i *indexedIterator) Prev() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch {
+ case i.data != nil && !i.data.Prev():
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ fallthrough
+ case i.data == nil:
+ if !i.index.Prev() {
+ i.indexErr()
+ return false
+ }
+ i.setData()
+ if !i.data.Last() {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Prev()
+ }
+ }
+ return true
+}
+
+func (i *indexedIterator) Key() []byte {
+ if i.data == nil {
+ return nil
+ }
+ return i.data.Key()
+}
+
+func (i *indexedIterator) Value() []byte {
+ if i.data == nil {
+ return nil
+ }
+ return i.data.Value()
+}
+
+func (i *indexedIterator) Release() {
+ i.clearData()
+ i.index.Release()
+ i.BasicReleaser.Release()
+}
+
+func (i *indexedIterator) Error() error {
+ if i.err != nil {
+ return i.err
+ }
+ if err := i.index.Error(); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (i *indexedIterator) SetErrorCallback(f func(err error)) {
+ i.errf = f
+}
+
+// NewIndexedIterator returns an 'indexed iterator'. An index is iterator
+// that returns another iterator, a 'data iterator'. A 'data iterator' is the
+// iterator that contains actual key/value pairs.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'indexed iterator', otherwise the iterator will
+// continue to the next 'data iterator'. Corruption on 'index iterator' will not be
+// ignored and will halt the iterator.
+func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator {
+ return &indexedIterator{index: index, strict: strict}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
new file mode 100644
index 000000000..3b5553274
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
@@ -0,0 +1,132 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package iterator provides interface and implementation to traverse over
+// contents of a database.
+package iterator
+
+import (
+ "errors"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+ ErrIterReleased = errors.New("leveldb/iterator: iterator released")
+)
+
+// IteratorSeeker is the interface that wraps the 'seeks method'.
+type IteratorSeeker interface {
+ // First moves the iterator to the first key/value pair. If the iterator
+ // only contains one key/value pair then First and Last would moves
+ // to the same key/value pair.
+ // It returns whether such pair exist.
+ First() bool
+
+ // Last moves the iterator to the last key/value pair. If the iterator
+ // only contains one key/value pair then First and Last would moves
+ // to the same key/value pair.
+ // It returns whether such pair exist.
+ Last() bool
+
+ // Seek moves the iterator to the first key/value pair whose key is greater
+ // than or equal to the given key.
+ // It returns whether such pair exist.
+ //
+ // It is safe to modify the contents of the argument after Seek returns.
+ Seek(key []byte) bool
+
+ // Next moves the iterator to the next key/value pair.
+ // It returns whether the iterator is exhausted.
+ Next() bool
+
+ // Prev moves the iterator to the previous key/value pair.
+ // It returns whether the iterator is exhausted.
+ Prev() bool
+}
+
+// CommonIterator is the interface that wraps common iterator methods.
+type CommonIterator interface {
+ IteratorSeeker
+
+ // util.Releaser is the interface that wraps basic Release method.
+ // When called Release will releases any resources associated with the
+ // iterator.
+ util.Releaser
+
+ // util.ReleaseSetter is the interface that wraps the basic SetReleaser
+ // method.
+ util.ReleaseSetter
+
+ // TODO: Remove this when ready.
+ Valid() bool
+
+ // Error returns any accumulated error. Exhausting all the key/value pairs
+ // is not considered to be an error.
+ Error() error
+}
+
+// Iterator iterates over a DB's key/value pairs in key order.
+//
+// When encounter an error any 'seeks method' will return false and will
+// yield no key/value pairs. The error can be queried by calling the Error
+// method. Calling Release is still necessary.
+//
+// An iterator must be released after use, but it is not necessary to read
+// an iterator until exhaustion.
+// Also, an iterator is not necessarily safe for concurrent use, but it is
+// safe to use multiple iterators concurrently, with each in a dedicated
+// goroutine.
+type Iterator interface {
+ CommonIterator
+
+ // Key returns the key of the current key/value pair, or nil if done.
+ // The caller should not modify the contents of the returned slice, and
+ // its contents may change on the next call to any 'seeks method'.
+ Key() []byte
+
+ // Value returns the key of the current key/value pair, or nil if done.
+ // The caller should not modify the contents of the returned slice, and
+ // its contents may change on the next call to any 'seeks method'.
+ Value() []byte
+}
+
+// ErrorCallbackSetter is the interface that wraps basic SetErrorCallback
+// method.
+//
+// ErrorCallbackSetter implemented by indexed and merged iterator.
+type ErrorCallbackSetter interface {
+ // SetErrorCallback allows set an error callback of the corresponding
+ // iterator. Use nil to clear the callback.
+ SetErrorCallback(f func(err error))
+}
+
+type emptyIterator struct {
+ util.BasicReleaser
+ err error
+}
+
+func (i *emptyIterator) rErr() {
+ if i.err == nil && i.Released() {
+ i.err = ErrIterReleased
+ }
+}
+
+func (*emptyIterator) Valid() bool { return false }
+func (i *emptyIterator) First() bool { i.rErr(); return false }
+func (i *emptyIterator) Last() bool { i.rErr(); return false }
+func (i *emptyIterator) Seek(key []byte) bool { i.rErr(); return false }
+func (i *emptyIterator) Next() bool { i.rErr(); return false }
+func (i *emptyIterator) Prev() bool { i.rErr(); return false }
+func (*emptyIterator) Key() []byte { return nil }
+func (*emptyIterator) Value() []byte { return nil }
+func (i *emptyIterator) Error() error { return i.err }
+
+// NewEmptyIterator creates an empty iterator. The err parameter can be
+// nil, but if not nil the given err will be returned by Error method.
+func NewEmptyIterator(err error) Iterator {
+ return &emptyIterator{err: err}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
new file mode 100644
index 000000000..1a7e29df8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
@@ -0,0 +1,304 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+type mergedIterator struct {
+ cmp comparer.Comparer
+ iters []Iterator
+ strict bool
+
+ keys [][]byte
+ index int
+ dir dir
+ err error
+ errf func(err error)
+ releaser util.Releaser
+}
+
+func assertKey(key []byte) []byte {
+ if key == nil {
+ panic("leveldb/iterator: nil key")
+ }
+ return key
+}
+
+func (i *mergedIterator) iterErr(iter Iterator) bool {
+ if err := iter.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ if i.strict || !errors.IsCorrupted(err) {
+ i.err = err
+ return true
+ }
+ }
+ return false
+}
+
+func (i *mergedIterator) Valid() bool {
+ return i.err == nil && i.dir > dirEOI
+}
+
+func (i *mergedIterator) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.First():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirSOI
+ return i.next()
+}
+
+func (i *mergedIterator) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.Last():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirEOI
+ return i.prev()
+}
+
+func (i *mergedIterator) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.Seek(key):
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirSOI
+ return i.next()
+}
+
+func (i *mergedIterator) next() bool {
+ var key []byte
+ if i.dir == dirForward {
+ key = i.keys[i.index]
+ }
+ for x, tkey := range i.keys {
+ if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) < 0) {
+ key = tkey
+ i.index = x
+ }
+ }
+ if key == nil {
+ i.dir = dirEOI
+ return false
+ }
+ i.dir = dirForward
+ return true
+}
+
+func (i *mergedIterator) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirSOI:
+ return i.First()
+ case dirBackward:
+ key := append([]byte{}, i.keys[i.index]...)
+ if !i.Seek(key) {
+ return false
+ }
+ return i.Next()
+ }
+
+ x := i.index
+ iter := i.iters[x]
+ switch {
+ case iter.Next():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ return i.next()
+}
+
+func (i *mergedIterator) prev() bool {
+ var key []byte
+ if i.dir == dirBackward {
+ key = i.keys[i.index]
+ }
+ for x, tkey := range i.keys {
+ if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) > 0) {
+ key = tkey
+ i.index = x
+ }
+ }
+ if key == nil {
+ i.dir = dirSOI
+ return false
+ }
+ i.dir = dirBackward
+ return true
+}
+
+func (i *mergedIterator) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirEOI:
+ return i.Last()
+ case dirForward:
+ key := append([]byte{}, i.keys[i.index]...)
+ for x, iter := range i.iters {
+ if x == i.index {
+ continue
+ }
+ seek := iter.Seek(key)
+ switch {
+ case seek && iter.Prev(), !seek && iter.Last():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ }
+
+ x := i.index
+ iter := i.iters[x]
+ switch {
+ case iter.Prev():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ return i.prev()
+}
+
+func (i *mergedIterator) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.keys[i.index]
+}
+
+func (i *mergedIterator) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.iters[i.index].Value()
+}
+
+func (i *mergedIterator) Release() {
+ if i.dir != dirReleased {
+ i.dir = dirReleased
+ for _, iter := range i.iters {
+ iter.Release()
+ }
+ i.iters = nil
+ i.keys = nil
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+ }
+}
+
+func (i *mergedIterator) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *mergedIterator) Error() error {
+ return i.err
+}
+
+func (i *mergedIterator) SetErrorCallback(f func(err error)) {
+ i.errf = f
+}
+
+// NewMergedIterator returns an iterator that merges its input. Walking the
+// resultant iterator will return all key/value pairs of all input iterators
+// in strictly increasing key order, as defined by cmp.
+// The input's key ranges may overlap, but there are assumed to be no duplicate
+// keys: if iters[i] contains a key k then iters[j] will not contain that key k.
+// None of the iters may be nil.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'merged iterator', otherwise the iterator will
+// continue to the next 'input iterator'.
+func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator {
+ return &mergedIterator{
+ iters: iters,
+ cmp: cmp,
+ strict: strict,
+ keys: make([][]byte, len(iters)),
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
new file mode 100644
index 000000000..d094c3d0f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
@@ -0,0 +1,524 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record.go?r=1d5ccbe03246da926391ee12d1c6caae054ff4b0
+// License, authors and contributors informations can be found at bellow URLs respectively:
+// https://code.google.com/p/leveldb-go/source/browse/LICENSE
+// https://code.google.com/p/leveldb-go/source/browse/AUTHORS
+// https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS
+
+// Package journal reads and writes sequences of journals. Each journal is a stream
+// of bytes that completes before the next journal starts.
+//
+// When reading, call Next to obtain an io.Reader for the next journal. Next will
+// return io.EOF when there are no more journals. It is valid to call Next
+// without reading the current journal to exhaustion.
+//
+// When writing, call Next to obtain an io.Writer for the next journal. Calling
+// Next finishes the current journal. Call Close to finish the final journal.
+//
+// Optionally, call Flush to finish the current journal and flush the underlying
+// writer without starting a new journal. To start a new journal after flushing,
+// call Next.
+//
+// Neither Readers or Writers are safe to use concurrently.
+//
+// Example code:
+// func read(r io.Reader) ([]string, error) {
+// var ss []string
+// journals := journal.NewReader(r, nil, true, true)
+// for {
+// j, err := journals.Next()
+// if err == io.EOF {
+// break
+// }
+// if err != nil {
+// return nil, err
+// }
+// s, err := ioutil.ReadAll(j)
+// if err != nil {
+// return nil, err
+// }
+// ss = append(ss, string(s))
+// }
+// return ss, nil
+// }
+//
+// func write(w io.Writer, ss []string) error {
+// journals := journal.NewWriter(w)
+// for _, s := range ss {
+// j, err := journals.Next()
+// if err != nil {
+// return err
+// }
+// if _, err := j.Write([]byte(s)), err != nil {
+// return err
+// }
+// }
+// return journals.Close()
+// }
+//
+// The wire format is that the stream is divided into 32KiB blocks, and each
+// block contains a number of tightly packed chunks. Chunks cannot cross block
+// boundaries. The last block may be shorter than 32 KiB. Any unused bytes in a
+// block must be zero.
+//
+// A journal maps to one or more chunks. Each chunk has a 7 byte header (a 4
+// byte checksum, a 2 byte little-endian uint16 length, and a 1 byte chunk type)
+// followed by a payload. The checksum is over the chunk type and the payload.
+//
+// There are four chunk types: whether the chunk is the full journal, or the
+// first, middle or last chunk of a multi-chunk journal. A multi-chunk journal
+// has one first chunk, zero or more middle chunks, and one last chunk.
+//
+// The wire format allows for limited recovery in the face of data corruption:
+// on a format error (such as a checksum mismatch), the reader moves to the
+// next block and looks for the next full or first chunk.
+package journal
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// These constants are part of the wire format and should not be changed.
+const (
+ fullChunkType = 1
+ firstChunkType = 2
+ middleChunkType = 3
+ lastChunkType = 4
+)
+
+const (
+ blockSize = 32 * 1024
+ headerSize = 7
+)
+
+type flusher interface {
+ Flush() error
+}
+
+// ErrCorrupted is the error type that generated by corrupted block or chunk.
+type ErrCorrupted struct {
+ Size int
+ Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size)
+}
+
+// Dropper is the interface that wrap simple Drop method. The Drop
+// method will be called when the journal reader dropping a block or chunk.
+type Dropper interface {
+ Drop(err error)
+}
+
+// Reader reads journals from an underlying io.Reader.
+type Reader struct {
+ // r is the underlying reader.
+ r io.Reader
+ // the dropper.
+ dropper Dropper
+ // strict flag.
+ strict bool
+ // checksum flag.
+ checksum bool
+ // seq is the sequence number of the current journal.
+ seq int
+ // buf[i:j] is the unread portion of the current chunk's payload.
+ // The low bound, i, excludes the chunk header.
+ i, j int
+ // n is the number of bytes of buf that are valid. Once reading has started,
+ // only the final block can have n < blockSize.
+ n int
+ // last is whether the current chunk is the last chunk of the journal.
+ last bool
+ // err is any accumulated error.
+ err error
+ // buf is the buffer.
+ buf [blockSize]byte
+}
+
+// NewReader returns a new reader. The dropper may be nil, and if
+// strict is true then corrupted or invalid chunk will halt the journal
+// reader entirely.
+func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader {
+ return &Reader{
+ r: r,
+ dropper: dropper,
+ strict: strict,
+ checksum: checksum,
+ last: true,
+ }
+}
+
+var errSkip = errors.New("leveldb/journal: skipped")
+
+func (r *Reader) corrupt(n int, reason string, skip bool) error {
+ if r.dropper != nil {
+ r.dropper.Drop(&ErrCorrupted{n, reason})
+ }
+ if r.strict && !skip {
+ r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason})
+ return r.err
+ }
+ return errSkip
+}
+
+// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the
+// next block into the buffer if necessary.
+func (r *Reader) nextChunk(first bool) error {
+ for {
+ if r.j+headerSize <= r.n {
+ checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4])
+ length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6])
+ chunkType := r.buf[r.j+6]
+ unprocBlock := r.n - r.j
+ if checksum == 0 && length == 0 && chunkType == 0 {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "zero header", false)
+ }
+ if chunkType < fullChunkType || chunkType > lastChunkType {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, fmt.Sprintf("invalid chunk type %#x", chunkType), false)
+ }
+ r.i = r.j + headerSize
+ r.j = r.j + headerSize + int(length)
+ if r.j > r.n {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "chunk length overflows block", false)
+ } else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "checksum mismatch", false)
+ }
+ if first && chunkType != fullChunkType && chunkType != firstChunkType {
+ chunkLength := (r.j - r.i) + headerSize
+ r.i = r.j
+ // Report the error, but skip it.
+ return r.corrupt(chunkLength, "orphan chunk", true)
+ }
+ r.last = chunkType == fullChunkType || chunkType == lastChunkType
+ return nil
+ }
+
+ // The last block.
+ if r.n < blockSize && r.n > 0 {
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
+ }
+ r.err = io.EOF
+ return r.err
+ }
+
+ // Read block.
+ n, err := io.ReadFull(r.r, r.buf[:])
+ if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+ return err
+ }
+ if n == 0 {
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
+ }
+ r.err = io.EOF
+ return r.err
+ }
+ r.i, r.j, r.n = 0, 0, n
+ }
+}
+
+// Next returns a reader for the next journal. It returns io.EOF if there are no
+// more journals. The reader returned becomes stale after the next Next call,
+// and should no longer be used. If strict is false, the reader will returns
+// io.ErrUnexpectedEOF error when found corrupted journal.
+func (r *Reader) Next() (io.Reader, error) {
+ r.seq++
+ if r.err != nil {
+ return nil, r.err
+ }
+ r.i = r.j
+ for {
+ if err := r.nextChunk(true); err == nil {
+ break
+ } else if err != errSkip {
+ return nil, err
+ }
+ }
+ return &singleReader{r, r.seq, nil}, nil
+}
+
+// Reset resets the journal reader, allows reuse of the journal reader. Reset returns
+// last accumulated error.
+func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error {
+ r.seq++
+ err := r.err
+ r.r = reader
+ r.dropper = dropper
+ r.strict = strict
+ r.checksum = checksum
+ r.i = 0
+ r.j = 0
+ r.n = 0
+ r.last = true
+ r.err = nil
+ return err
+}
+
+type singleReader struct {
+ r *Reader
+ seq int
+ err error
+}
+
+func (x *singleReader) Read(p []byte) (int, error) {
+ r := x.r
+ if r.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale reader")
+ }
+ if x.err != nil {
+ return 0, x.err
+ }
+ if r.err != nil {
+ return 0, r.err
+ }
+ for r.i == r.j {
+ if r.last {
+ return 0, io.EOF
+ }
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
+ return 0, x.err
+ }
+ }
+ n := copy(p, r.buf[r.i:r.j])
+ r.i += n
+ return n, nil
+}
+
+func (x *singleReader) ReadByte() (byte, error) {
+ r := x.r
+ if r.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale reader")
+ }
+ if x.err != nil {
+ return 0, x.err
+ }
+ if r.err != nil {
+ return 0, r.err
+ }
+ for r.i == r.j {
+ if r.last {
+ return 0, io.EOF
+ }
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
+ return 0, x.err
+ }
+ }
+ c := r.buf[r.i]
+ r.i++
+ return c, nil
+}
+
+// Writer writes journals to an underlying io.Writer.
+type Writer struct {
+ // w is the underlying writer.
+ w io.Writer
+ // seq is the sequence number of the current journal.
+ seq int
+ // f is w as a flusher.
+ f flusher
+ // buf[i:j] is the bytes that will become the current chunk.
+ // The low bound, i, includes the chunk header.
+ i, j int
+ // buf[:written] has already been written to w.
+ // written is zero unless Flush has been called.
+ written int
+ // first is whether the current chunk is the first chunk of the journal.
+ first bool
+ // pending is whether a chunk is buffered but not yet written.
+ pending bool
+ // err is any accumulated error.
+ err error
+ // buf is the buffer.
+ buf [blockSize]byte
+}
+
+// NewWriter returns a new Writer.
+func NewWriter(w io.Writer) *Writer {
+ f, _ := w.(flusher)
+ return &Writer{
+ w: w,
+ f: f,
+ }
+}
+
+// fillHeader fills in the header for the pending chunk.
+func (w *Writer) fillHeader(last bool) {
+ if w.i+headerSize > w.j || w.j > blockSize {
+ panic("leveldb/journal: bad writer state")
+ }
+ if last {
+ if w.first {
+ w.buf[w.i+6] = fullChunkType
+ } else {
+ w.buf[w.i+6] = lastChunkType
+ }
+ } else {
+ if w.first {
+ w.buf[w.i+6] = firstChunkType
+ } else {
+ w.buf[w.i+6] = middleChunkType
+ }
+ }
+ binary.LittleEndian.PutUint32(w.buf[w.i+0:w.i+4], util.NewCRC(w.buf[w.i+6:w.j]).Value())
+ binary.LittleEndian.PutUint16(w.buf[w.i+4:w.i+6], uint16(w.j-w.i-headerSize))
+}
+
+// writeBlock writes the buffered block to the underlying writer, and reserves
+// space for the next chunk's header.
+func (w *Writer) writeBlock() {
+ _, w.err = w.w.Write(w.buf[w.written:])
+ w.i = 0
+ w.j = headerSize
+ w.written = 0
+}
+
+// writePending finishes the current journal and writes the buffer to the
+// underlying writer.
+func (w *Writer) writePending() {
+ if w.err != nil {
+ return
+ }
+ if w.pending {
+ w.fillHeader(true)
+ w.pending = false
+ }
+ _, w.err = w.w.Write(w.buf[w.written:w.j])
+ w.written = w.j
+}
+
+// Close finishes the current journal and closes the writer.
+func (w *Writer) Close() error {
+ w.seq++
+ w.writePending()
+ if w.err != nil {
+ return w.err
+ }
+ w.err = errors.New("leveldb/journal: closed Writer")
+ return nil
+}
+
+// Flush finishes the current journal, writes to the underlying writer, and
+// flushes it if that writer implements interface{ Flush() error }.
+func (w *Writer) Flush() error {
+ w.seq++
+ w.writePending()
+ if w.err != nil {
+ return w.err
+ }
+ if w.f != nil {
+ w.err = w.f.Flush()
+ return w.err
+ }
+ return nil
+}
+
+// Reset resets the journal writer, allows reuse of the journal writer. Reset
+// will also closes the journal writer if not already.
+func (w *Writer) Reset(writer io.Writer) (err error) {
+ w.seq++
+ if w.err == nil {
+ w.writePending()
+ err = w.err
+ }
+ w.w = writer
+ w.f, _ = writer.(flusher)
+ w.i = 0
+ w.j = 0
+ w.written = 0
+ w.first = false
+ w.pending = false
+ w.err = nil
+ return
+}
+
+// Next returns a writer for the next journal. The writer returned becomes stale
+// after the next Close, Flush or Next call, and should no longer be used.
+func (w *Writer) Next() (io.Writer, error) {
+ w.seq++
+ if w.err != nil {
+ return nil, w.err
+ }
+ if w.pending {
+ w.fillHeader(true)
+ }
+ w.i = w.j
+ w.j = w.j + headerSize
+ // Check if there is room in the block for the header.
+ if w.j > blockSize {
+ // Fill in the rest of the block with zeroes.
+ for k := w.i; k < blockSize; k++ {
+ w.buf[k] = 0
+ }
+ w.writeBlock()
+ if w.err != nil {
+ return nil, w.err
+ }
+ }
+ w.first = true
+ w.pending = true
+ return singleWriter{w, w.seq}, nil
+}
+
+type singleWriter struct {
+ w *Writer
+ seq int
+}
+
+func (x singleWriter) Write(p []byte) (int, error) {
+ w := x.w
+ if w.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale writer")
+ }
+ if w.err != nil {
+ return 0, w.err
+ }
+ n0 := len(p)
+ for len(p) > 0 {
+ // Write a block, if it is full.
+ if w.j == blockSize {
+ w.fillHeader(false)
+ w.writeBlock()
+ if w.err != nil {
+ return 0, w.err
+ }
+ w.first = false
+ }
+ // Copy bytes into the buffer.
+ n := copy(w.buf[w.j:], p)
+ w.j += n
+ p = p[n:]
+ }
+ return n0, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/key.go b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
new file mode 100644
index 000000000..ad8f51ec8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
@@ -0,0 +1,143 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "encoding/binary"
+ "fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrInternalKeyCorrupted records internal key corruption.
+type ErrInternalKeyCorrupted struct {
+ Ikey []byte
+ Reason string
+}
+
+func (e *ErrInternalKeyCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason)
+}
+
+func newErrInternalKeyCorrupted(ikey []byte, reason string) error {
+ return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason})
+}
+
+type keyType uint
+
+func (kt keyType) String() string {
+ switch kt {
+ case keyTypeDel:
+ return "d"
+ case keyTypeVal:
+ return "v"
+ }
+ return fmt.Sprintf("<invalid:%#x>", uint(kt))
+}
+
+// Value types encoded as the last component of internal keys.
+// Don't modify; this value are saved to disk.
+const (
+ keyTypeDel = keyType(0)
+ keyTypeVal = keyType(1)
+)
+
+// keyTypeSeek defines the keyType that should be passed when constructing an
+// internal key for seeking to a particular sequence number (since we
+// sort sequence numbers in decreasing order and the value type is
+// embedded as the low 8 bits in the sequence number in internal keys,
+// we need to use the highest-numbered ValueType, not the lowest).
+const keyTypeSeek = keyTypeVal
+
+const (
+ // Maximum value possible for sequence number; the 8-bits are
+ // used by value type, so its can packed together in single
+ // 64-bit integer.
+ keyMaxSeq = (uint64(1) << 56) - 1
+ // Maximum value possible for packed sequence number and type.
+ keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek)
+)
+
+// Maximum number encoded in bytes.
+var keyMaxNumBytes = make([]byte, 8)
+
+func init() {
+ binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum)
+}
+
+type internalKey []byte
+
+func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey {
+ if seq > keyMaxSeq {
+ panic("leveldb: invalid sequence number")
+ } else if kt > keyTypeVal {
+ panic("leveldb: invalid type")
+ }
+
+ dst = ensureBuffer(dst, len(ukey)+8)
+ copy(dst, ukey)
+ binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt))
+ return internalKey(dst)
+}
+
+func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) {
+ if len(ik) < 8 {
+ return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length")
+ }
+ num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
+ seq, kt = uint64(num>>8), keyType(num&0xff)
+ if kt > keyTypeVal {
+ return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type")
+ }
+ ukey = ik[:len(ik)-8]
+ return
+}
+
+func validInternalKey(ik []byte) bool {
+ _, _, _, err := parseInternalKey(ik)
+ return err == nil
+}
+
+func (ik internalKey) assert() {
+ if ik == nil {
+ panic("leveldb: nil internalKey")
+ }
+ if len(ik) < 8 {
+ panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik)))
+ }
+}
+
+func (ik internalKey) ukey() []byte {
+ ik.assert()
+ return ik[:len(ik)-8]
+}
+
+func (ik internalKey) num() uint64 {
+ ik.assert()
+ return binary.LittleEndian.Uint64(ik[len(ik)-8:])
+}
+
+func (ik internalKey) parseNum() (seq uint64, kt keyType) {
+ num := ik.num()
+ seq, kt = uint64(num>>8), keyType(num&0xff)
+ if kt > keyTypeVal {
+ panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
+ }
+ return
+}
+
+func (ik internalKey) String() string {
+ if ik == nil {
+ return "<nil>"
+ }
+
+ if ukey, seq, kt, err := parseInternalKey(ik); err == nil {
+ return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
+ }
+ return fmt.Sprintf("<invalid:%#x>", []byte(ik))
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
new file mode 100644
index 000000000..18a19ed42
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
@@ -0,0 +1,475 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package memdb provides in-memory key/value database implementation.
+package memdb
+
+import (
+ "math/rand"
+ "sync"
+
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrIterReleased = errors.New("leveldb/memdb: iterator released")
+)
+
+const tMaxHeight = 12
+
+type dbIter struct {
+ util.BasicReleaser
+ p *DB
+ slice *util.Range
+ node int
+ forward bool
+ key, value []byte
+ err error
+}
+
+func (i *dbIter) fill(checkStart, checkLimit bool) bool {
+ if i.node != 0 {
+ n := i.p.nodeData[i.node]
+ m := n + i.p.nodeData[i.node+nKey]
+ i.key = i.p.kvData[n:m]
+ if i.slice != nil {
+ switch {
+ case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0:
+ fallthrough
+ case checkStart && i.slice.Start != nil && i.p.cmp.Compare(i.key, i.slice.Start) < 0:
+ i.node = 0
+ goto bail
+ }
+ }
+ i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]]
+ return true
+ }
+bail:
+ i.key = nil
+ i.value = nil
+ return false
+}
+
+func (i *dbIter) Valid() bool {
+ return i.node != 0
+}
+
+func (i *dbIter) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Start != nil {
+ i.node, _ = i.p.findGE(i.slice.Start, false)
+ } else {
+ i.node = i.p.nodeData[nNext]
+ }
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Last() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = false
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Limit != nil {
+ i.node = i.p.findLT(i.slice.Limit)
+ } else {
+ i.node = i.p.findLast()
+ }
+ return i.fill(true, false)
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Start != nil && i.p.cmp.Compare(key, i.slice.Start) < 0 {
+ key = i.slice.Start
+ }
+ i.node, _ = i.p.findGE(key, false)
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Next() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.node == 0 {
+ if !i.forward {
+ return i.First()
+ }
+ return false
+ }
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ i.node = i.p.nodeData[i.node+nNext]
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Prev() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.node == 0 {
+ if i.forward {
+ return i.Last()
+ }
+ return false
+ }
+ i.forward = false
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ i.node = i.p.findLT(i.key)
+ return i.fill(true, false)
+}
+
+func (i *dbIter) Key() []byte {
+ return i.key
+}
+
+func (i *dbIter) Value() []byte {
+ return i.value
+}
+
+func (i *dbIter) Error() error { return i.err }
+
+func (i *dbIter) Release() {
+ if !i.Released() {
+ i.p = nil
+ i.node = 0
+ i.key = nil
+ i.value = nil
+ i.BasicReleaser.Release()
+ }
+}
+
+const (
+ nKV = iota
+ nKey
+ nVal
+ nHeight
+ nNext
+)
+
+// DB is an in-memory key/value database.
+type DB struct {
+ cmp comparer.BasicComparer
+ rnd *rand.Rand
+
+ mu sync.RWMutex
+ kvData []byte
+ // Node data:
+ // [0] : KV offset
+ // [1] : Key length
+ // [2] : Value length
+ // [3] : Height
+ // [3..height] : Next nodes
+ nodeData []int
+ prevNode [tMaxHeight]int
+ maxHeight int
+ n int
+ kvSize int
+}
+
+func (p *DB) randHeight() (h int) {
+ const branching = 4
+ h = 1
+ for h < tMaxHeight && p.rnd.Int()%branching == 0 {
+ h++
+ }
+ return
+}
+
+// Must hold RW-lock if prev == true, as it use shared prevNode slice.
+func (p *DB) findGE(key []byte, prev bool) (int, bool) {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ cmp := 1
+ if next != 0 {
+ o := p.nodeData[next]
+ cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key)
+ }
+ if cmp < 0 {
+ // Keep searching in this list
+ node = next
+ } else {
+ if prev {
+ p.prevNode[h] = node
+ } else if cmp == 0 {
+ return next, true
+ }
+ if h == 0 {
+ return next, cmp == 0
+ }
+ h--
+ }
+ }
+}
+
+func (p *DB) findLT(key []byte) int {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ o := p.nodeData[next]
+ if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 {
+ if h == 0 {
+ break
+ }
+ h--
+ } else {
+ node = next
+ }
+ }
+ return node
+}
+
+func (p *DB) findLast() int {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ if next == 0 {
+ if h == 0 {
+ break
+ }
+ h--
+ } else {
+ node = next
+ }
+ }
+ return node
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (p *DB) Put(key []byte, value []byte) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if node, exact := p.findGE(key, true); exact {
+ kvOffset := len(p.kvData)
+ p.kvData = append(p.kvData, key...)
+ p.kvData = append(p.kvData, value...)
+ p.nodeData[node] = kvOffset
+ m := p.nodeData[node+nVal]
+ p.nodeData[node+nVal] = len(value)
+ p.kvSize += len(value) - m
+ return nil
+ }
+
+ h := p.randHeight()
+ if h > p.maxHeight {
+ for i := p.maxHeight; i < h; i++ {
+ p.prevNode[i] = 0
+ }
+ p.maxHeight = h
+ }
+
+ kvOffset := len(p.kvData)
+ p.kvData = append(p.kvData, key...)
+ p.kvData = append(p.kvData, value...)
+ // Node
+ node := len(p.nodeData)
+ p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h)
+ for i, n := range p.prevNode[:h] {
+ m := n + nNext + i
+ p.nodeData = append(p.nodeData, p.nodeData[m])
+ p.nodeData[m] = node
+ }
+
+ p.kvSize += len(key) + len(value)
+ p.n++
+ return nil
+}
+
+// Delete deletes the value for the given key. It returns ErrNotFound if
+// the DB does not contain the key.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (p *DB) Delete(key []byte) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ node, exact := p.findGE(key, true)
+ if !exact {
+ return ErrNotFound
+ }
+
+ h := p.nodeData[node+nHeight]
+ for i, n := range p.prevNode[:h] {
+ m := n + 4 + i
+ p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i]
+ }
+
+ p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal]
+ p.n--
+ return nil
+}
+
+// Contains returns true if the given key are in the DB.
+//
+// It is safe to modify the contents of the arguments after Contains returns.
+func (p *DB) Contains(key []byte) bool {
+ p.mu.RLock()
+ _, exact := p.findGE(key, false)
+ p.mu.RUnlock()
+ return exact
+}
+
+// Get gets the value for the given key. It returns error.ErrNotFound if the
+// DB does not contain the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (p *DB) Get(key []byte) (value []byte, err error) {
+ p.mu.RLock()
+ if node, exact := p.findGE(key, false); exact {
+ o := p.nodeData[node] + p.nodeData[node+nKey]
+ value = p.kvData[o : o+p.nodeData[node+nVal]]
+ } else {
+ err = ErrNotFound
+ }
+ p.mu.RUnlock()
+ return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Find returns.
+func (p *DB) Find(key []byte) (rkey, value []byte, err error) {
+ p.mu.RLock()
+ if node, _ := p.findGE(key, false); node != 0 {
+ n := p.nodeData[node]
+ m := n + p.nodeData[node+nKey]
+ rkey = p.kvData[n:m]
+ value = p.kvData[m : m+p.nodeData[node+nVal]]
+ } else {
+ err = ErrNotFound
+ }
+ p.mu.RUnlock()
+ return
+}
+
+// NewIterator returns an iterator of the DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. However, the resultant key/value pairs are not guaranteed
+// to be a consistent snapshot of the DB at a particular point in time.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (p *DB) NewIterator(slice *util.Range) iterator.Iterator {
+ return &dbIter{p: p, slice: slice}
+}
+
+// Capacity returns keys/values buffer capacity.
+func (p *DB) Capacity() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return cap(p.kvData)
+}
+
+// Size returns sum of keys and values length. Note that deleted
+// key/value will not be accounted for, but it will still consume
+// the buffer, since the buffer is append only.
+func (p *DB) Size() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return p.kvSize
+}
+
+// Free returns keys/values free buffer before need to grow.
+func (p *DB) Free() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return cap(p.kvData) - len(p.kvData)
+}
+
+// Len returns the number of entries in the DB.
+func (p *DB) Len() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return p.n
+}
+
+// Reset resets the DB to initial empty state. Allows reuse the buffer.
+func (p *DB) Reset() {
+ p.mu.Lock()
+ p.rnd = rand.New(rand.NewSource(0xdeadbeef))
+ p.maxHeight = 1
+ p.n = 0
+ p.kvSize = 0
+ p.kvData = p.kvData[:0]
+ p.nodeData = p.nodeData[:nNext+tMaxHeight]
+ p.nodeData[nKV] = 0
+ p.nodeData[nKey] = 0
+ p.nodeData[nVal] = 0
+ p.nodeData[nHeight] = tMaxHeight
+ for n := 0; n < tMaxHeight; n++ {
+ p.nodeData[nNext+n] = 0
+ p.prevNode[n] = 0
+ }
+ p.mu.Unlock()
+}
+
+// New creates a new initialized in-memory key/value DB. The capacity
+// is the initial key/value buffer capacity. The capacity is advisory,
+// not enforced.
+//
+// This DB is append-only, deleting an entry would remove entry node but not
+// reclaim KV buffer.
+//
+// The returned DB instance is safe for concurrent use.
+func New(cmp comparer.BasicComparer, capacity int) *DB {
+ p := &DB{
+ cmp: cmp,
+ rnd: rand.New(rand.NewSource(0xdeadbeef)),
+ maxHeight: 1,
+ kvData: make([]byte, 0, capacity),
+ nodeData: make([]int, 4+tMaxHeight),
+ }
+ p.nodeData[nHeight] = tMaxHeight
+ return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
new file mode 100644
index 000000000..44e7d9adc
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
@@ -0,0 +1,684 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package opt provides sets of options used by LevelDB.
+package opt
+
+import (
+ "math"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+const (
+ KiB = 1024
+ MiB = KiB * 1024
+ GiB = MiB * 1024
+)
+
+var (
+ DefaultBlockCacher = LRUCacher
+ DefaultBlockCacheCapacity = 8 * MiB
+ DefaultBlockRestartInterval = 16
+ DefaultBlockSize = 4 * KiB
+ DefaultCompactionExpandLimitFactor = 25
+ DefaultCompactionGPOverlapsFactor = 10
+ DefaultCompactionL0Trigger = 4
+ DefaultCompactionSourceLimitFactor = 1
+ DefaultCompactionTableSize = 2 * MiB
+ DefaultCompactionTableSizeMultiplier = 1.0
+ DefaultCompactionTotalSize = 10 * MiB
+ DefaultCompactionTotalSizeMultiplier = 10.0
+ DefaultCompressionType = SnappyCompression
+ DefaultIteratorSamplingRate = 1 * MiB
+ DefaultOpenFilesCacher = LRUCacher
+ DefaultOpenFilesCacheCapacity = 500
+ DefaultWriteBuffer = 4 * MiB
+ DefaultWriteL0PauseTrigger = 12
+ DefaultWriteL0SlowdownTrigger = 8
+)
+
+// Cacher is a caching algorithm.
+type Cacher interface {
+ New(capacity int) cache.Cacher
+}
+
+type CacherFunc struct {
+ NewFunc func(capacity int) cache.Cacher
+}
+
+func (f *CacherFunc) New(capacity int) cache.Cacher {
+ if f.NewFunc != nil {
+ return f.NewFunc(capacity)
+ }
+ return nil
+}
+
+func noCacher(int) cache.Cacher { return nil }
+
+var (
+ // LRUCacher is the LRU-cache algorithm.
+ LRUCacher = &CacherFunc{cache.NewLRU}
+
+ // NoCacher is the value to disable caching algorithm.
+ NoCacher = &CacherFunc{}
+)
+
+// Compression is the 'sorted table' block compression algorithm to use.
+type Compression uint
+
+func (c Compression) String() string {
+ switch c {
+ case DefaultCompression:
+ return "default"
+ case NoCompression:
+ return "none"
+ case SnappyCompression:
+ return "snappy"
+ }
+ return "invalid"
+}
+
+const (
+ DefaultCompression Compression = iota
+ NoCompression
+ SnappyCompression
+ nCompression
+)
+
+// Strict is the DB 'strict level'.
+type Strict uint
+
+const (
+ // If present then a corrupted or invalid chunk or block in manifest
+ // journal will cause an error instead of being dropped.
+ // This will prevent database with corrupted manifest to be opened.
+ StrictManifest Strict = 1 << iota
+
+ // If present then journal chunk checksum will be verified.
+ StrictJournalChecksum
+
+ // If present then a corrupted or invalid chunk or block in journal
+ // will cause an error instead of being dropped.
+ // This will prevent database with corrupted journal to be opened.
+ StrictJournal
+
+ // If present then 'sorted table' block checksum will be verified.
+ // This has effect on both 'read operation' and compaction.
+ StrictBlockChecksum
+
+ // If present then a corrupted 'sorted table' will fails compaction.
+ // The database will enter read-only mode.
+ StrictCompaction
+
+ // If present then a corrupted 'sorted table' will halts 'read operation'.
+ StrictReader
+
+ // If present then leveldb.Recover will drop corrupted 'sorted table'.
+ StrictRecovery
+
+ // This only applicable for ReadOptions, if present then this ReadOptions
+ // 'strict level' will override global ones.
+ StrictOverride
+
+ // StrictAll enables all strict flags.
+ StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery
+
+ // DefaultStrict is the default strict flags. Specify any strict flags
+ // will override default strict flags as whole (i.e. not OR'ed).
+ DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader
+
+ // NoStrict disables all strict flags. Override default strict flags.
+ NoStrict = ^StrictAll
+)
+
+// Options holds the optional parameters for the DB at large.
+type Options struct {
+ // AltFilters defines one or more 'alternative filters'.
+ // 'alternative filters' will be used during reads if a filter block
+ // does not match with the 'effective filter'.
+ //
+ // The default value is nil
+ AltFilters []filter.Filter
+
+ // BlockCacher provides cache algorithm for LevelDB 'sorted table' block caching.
+ // Specify NoCacher to disable caching algorithm.
+ //
+ // The default value is LRUCacher.
+ BlockCacher Cacher
+
+ // BlockCacheCapacity defines the capacity of the 'sorted table' block caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher.
+ //
+ // The default value is 8MiB.
+ BlockCacheCapacity int
+
+ // BlockRestartInterval is the number of keys between restart points for
+ // delta encoding of keys.
+ //
+ // The default value is 16.
+ BlockRestartInterval int
+
+ // BlockSize is the minimum uncompressed size in bytes of each 'sorted table'
+ // block.
+ //
+ // The default value is 4KiB.
+ BlockSize int
+
+ // CompactionExpandLimitFactor limits compaction size after expanded.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 25.
+ CompactionExpandLimitFactor int
+
+ // CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a
+ // single 'sorted table' generates.
+ // This will be multiplied by table size limit at grandparent level.
+ //
+ // The default value is 10.
+ CompactionGPOverlapsFactor int
+
+ // CompactionL0Trigger defines number of 'sorted table' at level-0 that will
+ // trigger compaction.
+ //
+ // The default value is 4.
+ CompactionL0Trigger int
+
+ // CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
+ // level-0.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 1.
+ CompactionSourceLimitFactor int
+
+ // CompactionTableSize limits size of 'sorted table' that compaction generates.
+ // The limits for each level will be calculated as:
+ // CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
+ //
+ // The default value is 2MiB.
+ CompactionTableSize int
+
+ // CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
+ //
+ // The default value is 1.
+ CompactionTableSizeMultiplier float64
+
+ // CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTableSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTableSizeMultiplierPerLevel []float64
+
+ // CompactionTotalSize limits total size of 'sorted table' for each level.
+ // The limits for each level will be calculated as:
+ // CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using
+ // CompactionTotalSizeMultiplierPerLevel.
+ //
+ // The default value is 10MiB.
+ CompactionTotalSize int
+
+ // CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
+ //
+ // The default value is 10.
+ CompactionTotalSizeMultiplier float64
+
+ // CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTotalSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTotalSizeMultiplierPerLevel []float64
+
+ // Comparer defines a total ordering over the space of []byte keys: a 'less
+ // than' relationship. The same comparison algorithm must be used for reads
+ // and writes over the lifetime of the DB.
+ //
+ // The default value uses the same ordering as bytes.Compare.
+ Comparer comparer.Comparer
+
+ // Compression defines the 'sorted table' block compression to use.
+ //
+ // The default value (DefaultCompression) uses snappy compression.
+ Compression Compression
+
+ // DisableBufferPool allows disable use of util.BufferPool functionality.
+ //
+ // The default value is false.
+ DisableBufferPool bool
+
+ // DisableBlockCache allows disable use of cache.Cache functionality on
+ // 'sorted table' block.
+ //
+ // The default value is false.
+ DisableBlockCache bool
+
+ // DisableCompactionBackoff allows disable compaction retry backoff.
+ //
+ // The default value is false.
+ DisableCompactionBackoff bool
+
+ // DisableLargeBatchTransaction allows disabling switch-to-transaction mode
+ // on large batch write. If enable batch writes large than WriteBuffer will
+ // use transaction.
+ //
+ // The default is false.
+ DisableLargeBatchTransaction bool
+
+ // ErrorIfExist defines whether an error should returned if the DB already
+ // exist.
+ //
+ // The default value is false.
+ ErrorIfExist bool
+
+ // ErrorIfMissing defines whether an error should returned if the DB is
+ // missing. If false then the database will be created if missing, otherwise
+ // an error will be returned.
+ //
+ // The default value is false.
+ ErrorIfMissing bool
+
+ // Filter defines an 'effective filter' to use. An 'effective filter'
+ // if defined will be used to generate per-table filter block.
+ // The filter name will be stored on disk.
+ // During reads LevelDB will try to find matching filter from
+ // 'effective filter' and 'alternative filters'.
+ //
+ // Filter can be changed after a DB has been created. It is recommended
+ // to put old filter to the 'alternative filters' to mitigate lack of
+ // filter during transition period.
+ //
+ // A filter is used to reduce disk reads when looking for a specific key.
+ //
+ // The default value is nil.
+ Filter filter.Filter
+
+ // IteratorSamplingRate defines approximate gap (in bytes) between read
+ // sampling of an iterator. The samples will be used to determine when
+ // compaction should be triggered.
+ //
+ // The default is 1MiB.
+ IteratorSamplingRate int
+
+ // NoSync allows completely disable fsync.
+ //
+ // The default is false.
+ NoSync bool
+
+ // NoWriteMerge allows disabling write merge.
+ //
+ // The default is false.
+ NoWriteMerge bool
+
+ // OpenFilesCacher provides cache algorithm for open files caching.
+ // Specify NoCacher to disable caching algorithm.
+ //
+ // The default value is LRUCacher.
+ OpenFilesCacher Cacher
+
+ // OpenFilesCacheCapacity defines the capacity of the open files caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher.
+ //
+ // The default value is 500.
+ OpenFilesCacheCapacity int
+
+ // If true then opens DB in read-only mode.
+ //
+ // The default value is false.
+ ReadOnly bool
+
+ // Strict defines the DB strict level.
+ Strict Strict
+
+ // WriteBuffer defines maximum size of a 'memdb' before flushed to
+ // 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk
+ // unsorted journal.
+ //
+ // LevelDB may held up to two 'memdb' at the same time.
+ //
+ // The default value is 4MiB.
+ WriteBuffer int
+
+ // WriteL0StopTrigger defines number of 'sorted table' at level-0 that will
+ // pause write.
+ //
+ // The default value is 12.
+ WriteL0PauseTrigger int
+
+ // WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that
+ // will trigger write slowdown.
+ //
+ // The default value is 8.
+ WriteL0SlowdownTrigger int
+}
+
+func (o *Options) GetAltFilters() []filter.Filter {
+ if o == nil {
+ return nil
+ }
+ return o.AltFilters
+}
+
+func (o *Options) GetBlockCacher() Cacher {
+ if o == nil || o.BlockCacher == nil {
+ return DefaultBlockCacher
+ } else if o.BlockCacher == NoCacher {
+ return nil
+ }
+ return o.BlockCacher
+}
+
+func (o *Options) GetBlockCacheCapacity() int {
+ if o == nil || o.BlockCacheCapacity == 0 {
+ return DefaultBlockCacheCapacity
+ } else if o.BlockCacheCapacity < 0 {
+ return 0
+ }
+ return o.BlockCacheCapacity
+}
+
+func (o *Options) GetBlockRestartInterval() int {
+ if o == nil || o.BlockRestartInterval <= 0 {
+ return DefaultBlockRestartInterval
+ }
+ return o.BlockRestartInterval
+}
+
+func (o *Options) GetBlockSize() int {
+ if o == nil || o.BlockSize <= 0 {
+ return DefaultBlockSize
+ }
+ return o.BlockSize
+}
+
+func (o *Options) GetCompactionExpandLimit(level int) int {
+ factor := DefaultCompactionExpandLimitFactor
+ if o != nil && o.CompactionExpandLimitFactor > 0 {
+ factor = o.CompactionExpandLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionGPOverlaps(level int) int {
+ factor := DefaultCompactionGPOverlapsFactor
+ if o != nil && o.CompactionGPOverlapsFactor > 0 {
+ factor = o.CompactionGPOverlapsFactor
+ }
+ return o.GetCompactionTableSize(level+2) * factor
+}
+
+func (o *Options) GetCompactionL0Trigger() int {
+ if o == nil || o.CompactionL0Trigger == 0 {
+ return DefaultCompactionL0Trigger
+ }
+ return o.CompactionL0Trigger
+}
+
+func (o *Options) GetCompactionSourceLimit(level int) int {
+ factor := DefaultCompactionSourceLimitFactor
+ if o != nil && o.CompactionSourceLimitFactor > 0 {
+ factor = o.CompactionSourceLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionTableSize(level int) int {
+ var (
+ base = DefaultCompactionTableSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTableSize > 0 {
+ base = o.CompactionTableSize
+ }
+ if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTableSizeMultiplierPerLevel[level]
+ } else if o.CompactionTableSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level))
+ }
+ return int(float64(base) * mult)
+}
+
+func (o *Options) GetCompactionTotalSize(level int) int64 {
+ var (
+ base = DefaultCompactionTotalSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTotalSize > 0 {
+ base = o.CompactionTotalSize
+ }
+ if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTotalSizeMultiplierPerLevel[level]
+ } else if o.CompactionTotalSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level))
+ }
+ return int64(float64(base) * mult)
+}
+
+func (o *Options) GetComparer() comparer.Comparer {
+ if o == nil || o.Comparer == nil {
+ return comparer.DefaultComparer
+ }
+ return o.Comparer
+}
+
+func (o *Options) GetCompression() Compression {
+ if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression {
+ return DefaultCompressionType
+ }
+ return o.Compression
+}
+
+func (o *Options) GetDisableBufferPool() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableBufferPool
+}
+
+func (o *Options) GetDisableBlockCache() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableBlockCache
+}
+
+func (o *Options) GetDisableCompactionBackoff() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableCompactionBackoff
+}
+
+func (o *Options) GetDisableLargeBatchTransaction() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableLargeBatchTransaction
+}
+
+func (o *Options) GetErrorIfExist() bool {
+ if o == nil {
+ return false
+ }
+ return o.ErrorIfExist
+}
+
+func (o *Options) GetErrorIfMissing() bool {
+ if o == nil {
+ return false
+ }
+ return o.ErrorIfMissing
+}
+
+func (o *Options) GetFilter() filter.Filter {
+ if o == nil {
+ return nil
+ }
+ return o.Filter
+}
+
+func (o *Options) GetIteratorSamplingRate() int {
+ if o == nil || o.IteratorSamplingRate <= 0 {
+ return DefaultIteratorSamplingRate
+ }
+ return o.IteratorSamplingRate
+}
+
+func (o *Options) GetNoSync() bool {
+ if o == nil {
+ return false
+ }
+ return o.NoSync
+}
+
+func (o *Options) GetNoWriteMerge() bool {
+ if o == nil {
+ return false
+ }
+ return o.NoWriteMerge
+}
+
+func (o *Options) GetOpenFilesCacher() Cacher {
+ if o == nil || o.OpenFilesCacher == nil {
+ return DefaultOpenFilesCacher
+ }
+ if o.OpenFilesCacher == NoCacher {
+ return nil
+ }
+ return o.OpenFilesCacher
+}
+
+func (o *Options) GetOpenFilesCacheCapacity() int {
+ if o == nil || o.OpenFilesCacheCapacity == 0 {
+ return DefaultOpenFilesCacheCapacity
+ } else if o.OpenFilesCacheCapacity < 0 {
+ return 0
+ }
+ return o.OpenFilesCacheCapacity
+}
+
+func (o *Options) GetReadOnly() bool {
+ if o == nil {
+ return false
+ }
+ return o.ReadOnly
+}
+
+func (o *Options) GetStrict(strict Strict) bool {
+ if o == nil || o.Strict == 0 {
+ return DefaultStrict&strict != 0
+ }
+ return o.Strict&strict != 0
+}
+
+func (o *Options) GetWriteBuffer() int {
+ if o == nil || o.WriteBuffer <= 0 {
+ return DefaultWriteBuffer
+ }
+ return o.WriteBuffer
+}
+
+func (o *Options) GetWriteL0PauseTrigger() int {
+ if o == nil || o.WriteL0PauseTrigger == 0 {
+ return DefaultWriteL0PauseTrigger
+ }
+ return o.WriteL0PauseTrigger
+}
+
+func (o *Options) GetWriteL0SlowdownTrigger() int {
+ if o == nil || o.WriteL0SlowdownTrigger == 0 {
+ return DefaultWriteL0SlowdownTrigger
+ }
+ return o.WriteL0SlowdownTrigger
+}
+
+// ReadOptions holds the optional parameters for 'read operation'. The
+// 'read operation' includes Get, Find and NewIterator.
+type ReadOptions struct {
+ // DontFillCache defines whether block reads for this 'read operation'
+ // should be cached. If false then the block will be cached. This does
+ // not affects already cached block.
+ //
+ // The default value is false.
+ DontFillCache bool
+
+ // Strict will be OR'ed with global DB 'strict level' unless StrictOverride
+ // is present. Currently only StrictReader that has effect here.
+ Strict Strict
+}
+
+func (ro *ReadOptions) GetDontFillCache() bool {
+ if ro == nil {
+ return false
+ }
+ return ro.DontFillCache
+}
+
+func (ro *ReadOptions) GetStrict(strict Strict) bool {
+ if ro == nil {
+ return false
+ }
+ return ro.Strict&strict != 0
+}
+
+// WriteOptions holds the optional parameters for 'write operation'. The
+// 'write operation' includes Write, Put and Delete.
+type WriteOptions struct {
+ // NoWriteMerge allows disabling write merge.
+ //
+ // The default is false.
+ NoWriteMerge bool
+
+ // Sync is whether to sync underlying writes from the OS buffer cache
+ // through to actual disk, if applicable. Setting Sync can result in
+ // slower writes.
+ //
+ // If false, and the machine crashes, then some recent writes may be lost.
+ // Note that if it is just the process that crashes (and the machine does
+ // not) then no writes will be lost.
+ //
+ // In other words, Sync being false has the same semantics as a write
+ // system call. Sync being true means write followed by fsync.
+ //
+ // The default value is false.
+ Sync bool
+}
+
+func (wo *WriteOptions) GetNoWriteMerge() bool {
+ if wo == nil {
+ return false
+ }
+ return wo.NoWriteMerge
+}
+
+func (wo *WriteOptions) GetSync() bool {
+ if wo == nil {
+ return false
+ }
+ return wo.Sync
+}
+
+func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool {
+ if ro.GetStrict(StrictOverride) {
+ return ro.GetStrict(strict)
+ } else {
+ return o.GetStrict(strict) || ro.GetStrict(strict)
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
new file mode 100644
index 000000000..b072b1ac4
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
@@ -0,0 +1,107 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func dupOptions(o *opt.Options) *opt.Options {
+ newo := &opt.Options{}
+ if o != nil {
+ *newo = *o
+ }
+ if newo.Strict == 0 {
+ newo.Strict = opt.DefaultStrict
+ }
+ return newo
+}
+
+func (s *session) setOptions(o *opt.Options) {
+ no := dupOptions(o)
+ // Alternative filters.
+ if filters := o.GetAltFilters(); len(filters) > 0 {
+ no.AltFilters = make([]filter.Filter, len(filters))
+ for i, filter := range filters {
+ no.AltFilters[i] = &iFilter{filter}
+ }
+ }
+ // Comparer.
+ s.icmp = &iComparer{o.GetComparer()}
+ no.Comparer = s.icmp
+ // Filter.
+ if filter := o.GetFilter(); filter != nil {
+ no.Filter = &iFilter{filter}
+ }
+
+ s.o = &cachedOptions{Options: no}
+ s.o.cache()
+}
+
+const optCachedLevel = 7
+
+type cachedOptions struct {
+ *opt.Options
+
+ compactionExpandLimit []int
+ compactionGPOverlaps []int
+ compactionSourceLimit []int
+ compactionTableSize []int
+ compactionTotalSize []int64
+}
+
+func (co *cachedOptions) cache() {
+ co.compactionExpandLimit = make([]int, optCachedLevel)
+ co.compactionGPOverlaps = make([]int, optCachedLevel)
+ co.compactionSourceLimit = make([]int, optCachedLevel)
+ co.compactionTableSize = make([]int, optCachedLevel)
+ co.compactionTotalSize = make([]int64, optCachedLevel)
+
+ for level := 0; level < optCachedLevel; level++ {
+ co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
+ co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
+ co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
+ co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level)
+ co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level)
+ }
+}
+
+func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
+ if level < optCachedLevel {
+ return co.compactionExpandLimit[level]
+ }
+ return co.Options.GetCompactionExpandLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
+ if level < optCachedLevel {
+ return co.compactionGPOverlaps[level]
+ }
+ return co.Options.GetCompactionGPOverlaps(level)
+}
+
+func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
+ if level < optCachedLevel {
+ return co.compactionSourceLimit[level]
+ }
+ return co.Options.GetCompactionSourceLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionTableSize(level int) int {
+ if level < optCachedLevel {
+ return co.compactionTableSize[level]
+ }
+ return co.Options.GetCompactionTableSize(level)
+}
+
+func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
+ if level < optCachedLevel {
+ return co.compactionTotalSize[level]
+ }
+ return co.Options.GetCompactionTotalSize(level)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session.go b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
new file mode 100644
index 000000000..f3e747701
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
@@ -0,0 +1,208 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "sync"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrManifestCorrupted records manifest corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrManifestCorrupted struct {
+ Field string
+ Reason string
+}
+
+func (e *ErrManifestCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
+}
+
+func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error {
+ return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason})
+}
+
+// session represent a persistent database session.
+type session struct {
+ // Need 64-bit alignment.
+ stNextFileNum int64 // current unused file number
+ stJournalNum int64 // current journal file number; need external synchronization
+ stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb
+ stTempFileNum int64
+ stSeqNum uint64 // last mem compacted seq; need external synchronization
+
+ stor storage.Storage
+ storLock storage.Locker
+ o *cachedOptions
+ icmp *iComparer
+ tops *tOps
+
+ manifest *journal.Writer
+ manifestWriter storage.Writer
+ manifestFd storage.FileDesc
+
+ stCompPtrs []internalKey // compaction pointers; need external synchronization
+ stVersion *version // current version
+ vmu sync.Mutex
+}
+
+// Creates new initialized session instance.
+func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
+ if stor == nil {
+ return nil, os.ErrInvalid
+ }
+ storLock, err := stor.Lock()
+ if err != nil {
+ return
+ }
+ s = &session{
+ stor: stor,
+ storLock: storLock,
+ }
+ s.setOptions(o)
+ s.tops = newTableOps(s)
+ s.setVersion(newVersion(s))
+ s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed")
+ return
+}
+
+// Close session.
+func (s *session) close() {
+ s.tops.close()
+ if s.manifest != nil {
+ s.manifest.Close()
+ }
+ if s.manifestWriter != nil {
+ s.manifestWriter.Close()
+ }
+ s.manifest = nil
+ s.manifestWriter = nil
+ s.setVersion(&version{s: s, closing: true})
+}
+
+// Release session lock.
+func (s *session) release() {
+ s.storLock.Unlock()
+}
+
+// Create a new database session; need external synchronization.
+func (s *session) create() error {
+ // create manifest
+ return s.newManifest(nil, nil)
+}
+
+// Recover a database session; need external synchronization.
+func (s *session) recover() (err error) {
+ defer func() {
+ if os.IsNotExist(err) {
+ // Don't return os.ErrNotExist if the underlying storage contains
+ // other files that belong to LevelDB. So the DB won't get trashed.
+ if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 {
+ err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
+ }
+ }
+ }()
+
+ fd, err := s.stor.GetMeta()
+ if err != nil {
+ return
+ }
+
+ reader, err := s.stor.Open(fd)
+ if err != nil {
+ return
+ }
+ defer reader.Close()
+
+ var (
+ // Options.
+ strict = s.o.GetStrict(opt.StrictManifest)
+
+ jr = journal.NewReader(reader, dropper{s, fd}, strict, true)
+ rec = &sessionRecord{}
+ staging = s.stVersion.newStaging()
+ )
+ for {
+ var r io.Reader
+ r, err = jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ break
+ }
+ return errors.SetFd(err, fd)
+ }
+
+ err = rec.decode(r)
+ if err == nil {
+ // save compact pointers
+ for _, r := range rec.compPtrs {
+ s.setCompPtr(r.level, internalKey(r.ikey))
+ }
+ // commit record to version staging
+ staging.commit(rec)
+ } else {
+ err = errors.SetFd(err, fd)
+ if strict || !errors.IsCorrupted(err) {
+ return
+ }
+ s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd))
+ }
+ rec.resetCompPtrs()
+ rec.resetAddedTables()
+ rec.resetDeletedTables()
+ }
+
+ switch {
+ case !rec.has(recComparer):
+ return newErrManifestCorrupted(fd, "comparer", "missing")
+ case rec.comparer != s.icmp.uName():
+ return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
+ case !rec.has(recNextFileNum):
+ return newErrManifestCorrupted(fd, "next-file-num", "missing")
+ case !rec.has(recJournalNum):
+ return newErrManifestCorrupted(fd, "journal-file-num", "missing")
+ case !rec.has(recSeqNum):
+ return newErrManifestCorrupted(fd, "seq-num", "missing")
+ }
+
+ s.manifestFd = fd
+ s.setVersion(staging.finish())
+ s.setNextFileNum(rec.nextFileNum)
+ s.recordCommited(rec)
+ return nil
+}
+
+// Commit session; need external synchronization.
+func (s *session) commit(r *sessionRecord) (err error) {
+ v := s.version()
+ defer v.release()
+
+ // spawn new version based on current version
+ nv := v.spawn(r)
+
+ if s.manifest == nil {
+ // manifest journal writer not yet created, create one
+ err = s.newManifest(r, nv)
+ } else {
+ err = s.flushManifest(r)
+ }
+
+ // finally, apply new version if no error rise
+ if err == nil {
+ s.setVersion(nv)
+ }
+
+ return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
new file mode 100644
index 000000000..089cd00b2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
@@ -0,0 +1,302 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int {
+ v := s.version()
+ defer v.release()
+ return v.pickMemdbLevel(umin, umax, maxLevel)
+}
+
+func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) {
+ // Create sorted table.
+ iter := mdb.NewIterator(nil)
+ defer iter.Release()
+ t, n, err := s.tops.createFrom(iter)
+ if err != nil {
+ return 0, err
+ }
+
+ // Pick level other than zero can cause compaction issue with large
+ // bulk insert and delete on strictly incrementing key-space. The
+ // problem is that the small deletion markers trapped at lower level,
+ // while key/value entries keep growing at higher level. Since the
+ // key-space is strictly incrementing it will not overlaps with
+ // higher level, thus maximum possible level is always picked, while
+ // overlapping deletion marker pushed into lower level.
+ // See: https://github.com/syndtr/goleveldb/issues/127.
+ flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel)
+ rec.addTableFile(flushLevel, t)
+
+ s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+ return flushLevel, nil
+}
+
+// Pick a compaction based on current state; need external synchronization.
+func (s *session) pickCompaction() *compaction {
+ v := s.version()
+
+ var sourceLevel int
+ var t0 tFiles
+ if v.cScore >= 1 {
+ sourceLevel = v.cLevel
+ cptr := s.getCompPtr(sourceLevel)
+ tables := v.levels[sourceLevel]
+ for _, t := range tables {
+ if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
+ t0 = append(t0, t)
+ break
+ }
+ }
+ if len(t0) == 0 {
+ t0 = append(t0, tables[0])
+ }
+ } else {
+ if p := atomic.LoadPointer(&v.cSeek); p != nil {
+ ts := (*tSet)(p)
+ sourceLevel = ts.level
+ t0 = append(t0, ts.table)
+ } else {
+ v.release()
+ return nil
+ }
+ }
+
+ return newCompaction(s, v, sourceLevel, t0)
+}
+
+// Create compaction from given level and range; need external synchronization.
+func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction {
+ v := s.version()
+
+ if sourceLevel >= len(v.levels) {
+ v.release()
+ return nil
+ }
+
+ t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0)
+ if len(t0) == 0 {
+ v.release()
+ return nil
+ }
+
+ // Avoid compacting too much in one shot in case the range is large.
+ // But we cannot do this for level-0 since level-0 files can overlap
+ // and we must not pick one file and drop another older file if the
+ // two files overlap.
+ if !noLimit && sourceLevel > 0 {
+ limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel))
+ total := int64(0)
+ for i, t := range t0 {
+ total += t.size
+ if total >= limit {
+ s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
+ t0 = t0[:i+1]
+ break
+ }
+ }
+ }
+
+ return newCompaction(s, v, sourceLevel, t0)
+}
+
+func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction {
+ c := &compaction{
+ s: s,
+ v: v,
+ sourceLevel: sourceLevel,
+ levels: [2]tFiles{t0, nil},
+ maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)),
+ tPtrs: make([]int, len(v.levels)),
+ }
+ c.expand()
+ c.save()
+ return c
+}
+
+// compaction represent a compaction state.
+type compaction struct {
+ s *session
+ v *version
+
+ sourceLevel int
+ levels [2]tFiles
+ maxGPOverlaps int64
+
+ gp tFiles
+ gpi int
+ seenKey bool
+ gpOverlappedBytes int64
+ imin, imax internalKey
+ tPtrs []int
+ released bool
+
+ snapGPI int
+ snapSeenKey bool
+ snapGPOverlappedBytes int64
+ snapTPtrs []int
+}
+
+func (c *compaction) save() {
+ c.snapGPI = c.gpi
+ c.snapSeenKey = c.seenKey
+ c.snapGPOverlappedBytes = c.gpOverlappedBytes
+ c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
+}
+
+func (c *compaction) restore() {
+ c.gpi = c.snapGPI
+ c.seenKey = c.snapSeenKey
+ c.gpOverlappedBytes = c.snapGPOverlappedBytes
+ c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
+}
+
+func (c *compaction) release() {
+ if !c.released {
+ c.released = true
+ c.v.release()
+ }
+}
+
+// Expand compacted tables; need external synchronization.
+func (c *compaction) expand() {
+ limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel))
+ vt0 := c.v.levels[c.sourceLevel]
+ vt1 := tFiles{}
+ if level := c.sourceLevel + 1; level < len(c.v.levels) {
+ vt1 = c.v.levels[level]
+ }
+
+ t0, t1 := c.levels[0], c.levels[1]
+ imin, imax := t0.getRange(c.s.icmp)
+ // We expand t0 here just incase ukey hop across tables.
+ t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0)
+ if len(t0) != len(c.levels[0]) {
+ imin, imax = t0.getRange(c.s.icmp)
+ }
+ t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
+ // Get entire range covered by compaction.
+ amin, amax := append(t0, t1...).getRange(c.s.icmp)
+
+ // See if we can grow the number of inputs in "sourceLevel" without
+ // changing the number of "sourceLevel+1" files we pick up.
+ if len(t1) > 0 {
+ exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0)
+ if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
+ xmin, xmax := exp0.getRange(c.s.icmp)
+ exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
+ if len(exp1) == len(t1) {
+ c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
+ c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
+ len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
+ imin, imax = xmin, xmax
+ t0, t1 = exp0, exp1
+ amin, amax = append(t0, t1...).getRange(c.s.icmp)
+ }
+ }
+ }
+
+ // Compute the set of grandparent files that overlap this compaction
+ // (parent == sourceLevel+1; grandparent == sourceLevel+2)
+ if level := c.sourceLevel + 2; level < len(c.v.levels) {
+ c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
+ }
+
+ c.levels[0], c.levels[1] = t0, t1
+ c.imin, c.imax = imin, imax
+}
+
+// Check whether compaction is trivial.
+func (c *compaction) trivial() bool {
+ return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
+}
+
+func (c *compaction) baseLevelForKey(ukey []byte) bool {
+ for level := c.sourceLevel + 2; level < len(c.v.levels); level++ {
+ tables := c.v.levels[level]
+ for c.tPtrs[level] < len(tables) {
+ t := tables[c.tPtrs[level]]
+ if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
+ // We've advanced far enough.
+ if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ // Key falls in this file's range, so definitely not base level.
+ return false
+ }
+ break
+ }
+ c.tPtrs[level]++
+ }
+ }
+ return true
+}
+
+func (c *compaction) shouldStopBefore(ikey internalKey) bool {
+ for ; c.gpi < len(c.gp); c.gpi++ {
+ gp := c.gp[c.gpi]
+ if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
+ break
+ }
+ if c.seenKey {
+ c.gpOverlappedBytes += gp.size
+ }
+ }
+ c.seenKey = true
+
+ if c.gpOverlappedBytes > c.maxGPOverlaps {
+ // Too much overlap for current output; start new output.
+ c.gpOverlappedBytes = 0
+ return true
+ }
+ return false
+}
+
+// Creates an iterator.
+func (c *compaction) newIterator() iterator.Iterator {
+ // Creates iterator slice.
+ icap := len(c.levels)
+ if c.sourceLevel == 0 {
+ // Special case for level-0.
+ icap = len(c.levels[0]) + 1
+ }
+ its := make([]iterator.Iterator, 0, icap)
+
+ // Options.
+ ro := &opt.ReadOptions{
+ DontFillCache: true,
+ Strict: opt.StrictOverride,
+ }
+ strict := c.s.o.GetStrict(opt.StrictCompaction)
+ if strict {
+ ro.Strict |= opt.StrictReader
+ }
+
+ for i, tables := range c.levels {
+ if len(tables) == 0 {
+ continue
+ }
+
+ // Level-0 is not sorted and may overlaps each other.
+ if c.sourceLevel+i == 0 {
+ for _, t := range tables {
+ its = append(its, c.s.tops.newIterator(t, nil, ro))
+ }
+ } else {
+ it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
+ its = append(its, it)
+ }
+ }
+
+ return iterator.NewMergedIterator(its, c.s.icmp, strict)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
new file mode 100644
index 000000000..854e1aa6f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
@@ -0,0 +1,323 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "bufio"
+ "encoding/binary"
+ "io"
+ "strings"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+type byteReader interface {
+ io.Reader
+ io.ByteReader
+}
+
+// These numbers are written to disk and should not be changed.
+const (
+ recComparer = 1
+ recJournalNum = 2
+ recNextFileNum = 3
+ recSeqNum = 4
+ recCompPtr = 5
+ recDelTable = 6
+ recAddTable = 7
+ // 8 was used for large value refs
+ recPrevJournalNum = 9
+)
+
+type cpRecord struct {
+ level int
+ ikey internalKey
+}
+
+type atRecord struct {
+ level int
+ num int64
+ size int64
+ imin internalKey
+ imax internalKey
+}
+
+type dtRecord struct {
+ level int
+ num int64
+}
+
+type sessionRecord struct {
+ hasRec int
+ comparer string
+ journalNum int64
+ prevJournalNum int64
+ nextFileNum int64
+ seqNum uint64
+ compPtrs []cpRecord
+ addedTables []atRecord
+ deletedTables []dtRecord
+
+ scratch [binary.MaxVarintLen64]byte
+ err error
+}
+
+func (p *sessionRecord) has(rec int) bool {
+ return p.hasRec&(1<<uint(rec)) != 0
+}
+
+func (p *sessionRecord) setComparer(name string) {
+ p.hasRec |= 1 << recComparer
+ p.comparer = name
+}
+
+func (p *sessionRecord) setJournalNum(num int64) {
+ p.hasRec |= 1 << recJournalNum
+ p.journalNum = num
+}
+
+func (p *sessionRecord) setPrevJournalNum(num int64) {
+ p.hasRec |= 1 << recPrevJournalNum
+ p.prevJournalNum = num
+}
+
+func (p *sessionRecord) setNextFileNum(num int64) {
+ p.hasRec |= 1 << recNextFileNum
+ p.nextFileNum = num
+}
+
+func (p *sessionRecord) setSeqNum(num uint64) {
+ p.hasRec |= 1 << recSeqNum
+ p.seqNum = num
+}
+
+func (p *sessionRecord) addCompPtr(level int, ikey internalKey) {
+ p.hasRec |= 1 << recCompPtr
+ p.compPtrs = append(p.compPtrs, cpRecord{level, ikey})
+}
+
+func (p *sessionRecord) resetCompPtrs() {
+ p.hasRec &= ^(1 << recCompPtr)
+ p.compPtrs = p.compPtrs[:0]
+}
+
+func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) {
+ p.hasRec |= 1 << recAddTable
+ p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax})
+}
+
+func (p *sessionRecord) addTableFile(level int, t *tFile) {
+ p.addTable(level, t.fd.Num, t.size, t.imin, t.imax)
+}
+
+func (p *sessionRecord) resetAddedTables() {
+ p.hasRec &= ^(1 << recAddTable)
+ p.addedTables = p.addedTables[:0]
+}
+
+func (p *sessionRecord) delTable(level int, num int64) {
+ p.hasRec |= 1 << recDelTable
+ p.deletedTables = append(p.deletedTables, dtRecord{level, num})
+}
+
+func (p *sessionRecord) resetDeletedTables() {
+ p.hasRec &= ^(1 << recDelTable)
+ p.deletedTables = p.deletedTables[:0]
+}
+
+func (p *sessionRecord) putUvarint(w io.Writer, x uint64) {
+ if p.err != nil {
+ return
+ }
+ n := binary.PutUvarint(p.scratch[:], x)
+ _, p.err = w.Write(p.scratch[:n])
+}
+
+func (p *sessionRecord) putVarint(w io.Writer, x int64) {
+ if x < 0 {
+ panic("invalid negative value")
+ }
+ p.putUvarint(w, uint64(x))
+}
+
+func (p *sessionRecord) putBytes(w io.Writer, x []byte) {
+ if p.err != nil {
+ return
+ }
+ p.putUvarint(w, uint64(len(x)))
+ if p.err != nil {
+ return
+ }
+ _, p.err = w.Write(x)
+}
+
+func (p *sessionRecord) encode(w io.Writer) error {
+ p.err = nil
+ if p.has(recComparer) {
+ p.putUvarint(w, recComparer)
+ p.putBytes(w, []byte(p.comparer))
+ }
+ if p.has(recJournalNum) {
+ p.putUvarint(w, recJournalNum)
+ p.putVarint(w, p.journalNum)
+ }
+ if p.has(recNextFileNum) {
+ p.putUvarint(w, recNextFileNum)
+ p.putVarint(w, p.nextFileNum)
+ }
+ if p.has(recSeqNum) {
+ p.putUvarint(w, recSeqNum)
+ p.putUvarint(w, p.seqNum)
+ }
+ for _, r := range p.compPtrs {
+ p.putUvarint(w, recCompPtr)
+ p.putUvarint(w, uint64(r.level))
+ p.putBytes(w, r.ikey)
+ }
+ for _, r := range p.deletedTables {
+ p.putUvarint(w, recDelTable)
+ p.putUvarint(w, uint64(r.level))
+ p.putVarint(w, r.num)
+ }
+ for _, r := range p.addedTables {
+ p.putUvarint(w, recAddTable)
+ p.putUvarint(w, uint64(r.level))
+ p.putVarint(w, r.num)
+ p.putVarint(w, r.size)
+ p.putBytes(w, r.imin)
+ p.putBytes(w, r.imax)
+ }
+ return p.err
+}
+
+func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 {
+ if p.err != nil {
+ return 0
+ }
+ x, err := binary.ReadUvarint(r)
+ if err != nil {
+ if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
+ } else if strings.HasPrefix(err.Error(), "binary:") {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, err.Error()})
+ } else {
+ p.err = err
+ }
+ return 0
+ }
+ return x
+}
+
+func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 {
+ return p.readUvarintMayEOF(field, r, false)
+}
+
+func (p *sessionRecord) readVarint(field string, r io.ByteReader) int64 {
+ x := int64(p.readUvarintMayEOF(field, r, false))
+ if x < 0 {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "invalid negative value"})
+ }
+ return x
+}
+
+func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
+ if p.err != nil {
+ return nil
+ }
+ n := p.readUvarint(field, r)
+ if p.err != nil {
+ return nil
+ }
+ x := make([]byte, n)
+ _, p.err = io.ReadFull(r, x)
+ if p.err != nil {
+ if p.err == io.ErrUnexpectedEOF {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
+ }
+ return nil
+ }
+ return x
+}
+
+func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
+ if p.err != nil {
+ return 0
+ }
+ x := p.readUvarint(field, r)
+ if p.err != nil {
+ return 0
+ }
+ return int(x)
+}
+
+func (p *sessionRecord) decode(r io.Reader) error {
+ br, ok := r.(byteReader)
+ if !ok {
+ br = bufio.NewReader(r)
+ }
+ p.err = nil
+ for p.err == nil {
+ rec := p.readUvarintMayEOF("field-header", br, true)
+ if p.err != nil {
+ if p.err == io.EOF {
+ return nil
+ }
+ return p.err
+ }
+ switch rec {
+ case recComparer:
+ x := p.readBytes("comparer", br)
+ if p.err == nil {
+ p.setComparer(string(x))
+ }
+ case recJournalNum:
+ x := p.readVarint("journal-num", br)
+ if p.err == nil {
+ p.setJournalNum(x)
+ }
+ case recPrevJournalNum:
+ x := p.readVarint("prev-journal-num", br)
+ if p.err == nil {
+ p.setPrevJournalNum(x)
+ }
+ case recNextFileNum:
+ x := p.readVarint("next-file-num", br)
+ if p.err == nil {
+ p.setNextFileNum(x)
+ }
+ case recSeqNum:
+ x := p.readUvarint("seq-num", br)
+ if p.err == nil {
+ p.setSeqNum(x)
+ }
+ case recCompPtr:
+ level := p.readLevel("comp-ptr.level", br)
+ ikey := p.readBytes("comp-ptr.ikey", br)
+ if p.err == nil {
+ p.addCompPtr(level, internalKey(ikey))
+ }
+ case recAddTable:
+ level := p.readLevel("add-table.level", br)
+ num := p.readVarint("add-table.num", br)
+ size := p.readVarint("add-table.size", br)
+ imin := p.readBytes("add-table.imin", br)
+ imax := p.readBytes("add-table.imax", br)
+ if p.err == nil {
+ p.addTable(level, num, size, imin, imax)
+ }
+ case recDelTable:
+ level := p.readLevel("del-table.level", br)
+ num := p.readVarint("del-table.num", br)
+ if p.err == nil {
+ p.delTable(level, num)
+ }
+ }
+ }
+
+ return p.err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go
new file mode 100644
index 000000000..34ad61798
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go
@@ -0,0 +1,258 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// Logging.
+
+type dropper struct {
+ s *session
+ fd storage.FileDesc
+}
+
+func (d dropper) Drop(err error) {
+ if e, ok := err.(*journal.ErrCorrupted); ok {
+ d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason)
+ } else {
+ d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err)
+ }
+}
+
+func (s *session) log(v ...interface{}) { s.stor.Log(fmt.Sprint(v...)) }
+func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf(format, v...)) }
+
+// File utils.
+
+func (s *session) newTemp() storage.FileDesc {
+ num := atomic.AddInt64(&s.stTempFileNum, 1) - 1
+ return storage.FileDesc{storage.TypeTemp, num}
+}
+
+// Session state.
+
+// Get current version. This will incr version ref, must call
+// version.release (exactly once) after use.
+func (s *session) version() *version {
+ s.vmu.Lock()
+ defer s.vmu.Unlock()
+ s.stVersion.ref++
+ return s.stVersion
+}
+
+func (s *session) tLen(level int) int {
+ s.vmu.Lock()
+ defer s.vmu.Unlock()
+ return s.stVersion.tLen(level)
+}
+
+// Set current version to v.
+func (s *session) setVersion(v *version) {
+ s.vmu.Lock()
+ v.ref = 1 // Holds by session.
+ if old := s.stVersion; old != nil {
+ v.ref++ // Holds by old version.
+ old.next = v
+ old.releaseNB()
+ }
+ s.stVersion = v
+ s.vmu.Unlock()
+}
+
+// Get current unused file number.
+func (s *session) nextFileNum() int64 {
+ return atomic.LoadInt64(&s.stNextFileNum)
+}
+
+// Set current unused file number to num.
+func (s *session) setNextFileNum(num int64) {
+ atomic.StoreInt64(&s.stNextFileNum, num)
+}
+
+// Mark file number as used.
+func (s *session) markFileNum(num int64) {
+ nextFileNum := num + 1
+ for {
+ old, x := s.stNextFileNum, nextFileNum
+ if old > x {
+ x = old
+ }
+ if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+ break
+ }
+ }
+}
+
+// Allocate a file number.
+func (s *session) allocFileNum() int64 {
+ return atomic.AddInt64(&s.stNextFileNum, 1) - 1
+}
+
+// Reuse given file number.
+func (s *session) reuseFileNum(num int64) {
+ for {
+ old, x := s.stNextFileNum, num
+ if old != x+1 {
+ x = old
+ }
+ if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+ break
+ }
+ }
+}
+
+// Set compaction ptr at given level; need external synchronization.
+func (s *session) setCompPtr(level int, ik internalKey) {
+ if level >= len(s.stCompPtrs) {
+ newCompPtrs := make([]internalKey, level+1)
+ copy(newCompPtrs, s.stCompPtrs)
+ s.stCompPtrs = newCompPtrs
+ }
+ s.stCompPtrs[level] = append(internalKey{}, ik...)
+}
+
+// Get compaction ptr at given level; need external synchronization.
+func (s *session) getCompPtr(level int) internalKey {
+ if level >= len(s.stCompPtrs) {
+ return nil
+ }
+ return s.stCompPtrs[level]
+}
+
+// Manifest related utils.
+
+// Fill given session record obj with current states; need external
+// synchronization.
+func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
+ r.setNextFileNum(s.nextFileNum())
+
+ if snapshot {
+ if !r.has(recJournalNum) {
+ r.setJournalNum(s.stJournalNum)
+ }
+
+ if !r.has(recSeqNum) {
+ r.setSeqNum(s.stSeqNum)
+ }
+
+ for level, ik := range s.stCompPtrs {
+ if ik != nil {
+ r.addCompPtr(level, ik)
+ }
+ }
+
+ r.setComparer(s.icmp.uName())
+ }
+}
+
+// Mark if record has been committed, this will update session state;
+// need external synchronization.
+func (s *session) recordCommited(rec *sessionRecord) {
+ if rec.has(recJournalNum) {
+ s.stJournalNum = rec.journalNum
+ }
+
+ if rec.has(recPrevJournalNum) {
+ s.stPrevJournalNum = rec.prevJournalNum
+ }
+
+ if rec.has(recSeqNum) {
+ s.stSeqNum = rec.seqNum
+ }
+
+ for _, r := range rec.compPtrs {
+ s.setCompPtr(r.level, internalKey(r.ikey))
+ }
+}
+
+// Create a new manifest file; need external synchronization.
+func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
+ fd := storage.FileDesc{storage.TypeManifest, s.allocFileNum()}
+ writer, err := s.stor.Create(fd)
+ if err != nil {
+ return
+ }
+ jw := journal.NewWriter(writer)
+
+ if v == nil {
+ v = s.version()
+ defer v.release()
+ }
+ if rec == nil {
+ rec = &sessionRecord{}
+ }
+ s.fillRecord(rec, true)
+ v.fillRecord(rec)
+
+ defer func() {
+ if err == nil {
+ s.recordCommited(rec)
+ if s.manifest != nil {
+ s.manifest.Close()
+ }
+ if s.manifestWriter != nil {
+ s.manifestWriter.Close()
+ }
+ if !s.manifestFd.Zero() {
+ s.stor.Remove(s.manifestFd)
+ }
+ s.manifestFd = fd
+ s.manifestWriter = writer
+ s.manifest = jw
+ } else {
+ writer.Close()
+ s.stor.Remove(fd)
+ s.reuseFileNum(fd.Num)
+ }
+ }()
+
+ w, err := jw.Next()
+ if err != nil {
+ return
+ }
+ err = rec.encode(w)
+ if err != nil {
+ return
+ }
+ err = jw.Flush()
+ if err != nil {
+ return
+ }
+ err = s.stor.SetMeta(fd)
+ return
+}
+
+// Flush record to disk.
+func (s *session) flushManifest(rec *sessionRecord) (err error) {
+ s.fillRecord(rec, false)
+ w, err := s.manifest.Next()
+ if err != nil {
+ return
+ }
+ err = rec.encode(w)
+ if err != nil {
+ return
+ }
+ err = s.manifest.Flush()
+ if err != nil {
+ return
+ }
+ if !s.o.GetNoSync() {
+ err = s.manifestWriter.Sync()
+ if err != nil {
+ return
+ }
+ }
+ s.recordCommited(rec)
+ return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
new file mode 100644
index 000000000..e53434cab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
@@ -0,0 +1,583 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reservefs.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+)
+
+var (
+ errFileOpen = errors.New("leveldb/storage: file still open")
+ errReadOnly = errors.New("leveldb/storage: storage is read-only")
+)
+
+type fileLock interface {
+ release() error
+}
+
+type fileStorageLock struct {
+ fs *fileStorage
+}
+
+func (lock *fileStorageLock) Unlock() {
+ if lock.fs != nil {
+ lock.fs.mu.Lock()
+ defer lock.fs.mu.Unlock()
+ if lock.fs.slock == lock {
+ lock.fs.slock = nil
+ }
+ }
+}
+
+const logSizeThreshold = 1024 * 1024 // 1 MiB
+
+// fileStorage is a file-system backed storage.
+type fileStorage struct {
+ path string
+ readOnly bool
+
+ mu sync.Mutex
+ flock fileLock
+ slock *fileStorageLock
+ logw *os.File
+ logSize int64
+ buf []byte
+ // Opened file counter; if open < 0 means closed.
+ open int
+ day int
+}
+
+// OpenFile returns a new filesytem-backed storage implementation with the given
+// path. This also acquire a file lock, so any subsequent attempt to open the
+// same path will fail.
+//
+// The storage must be closed after use, by calling Close method.
+func OpenFile(path string, readOnly bool) (Storage, error) {
+ if fi, err := os.Stat(path); err == nil {
+ if !fi.IsDir() {
+ return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path)
+ }
+ } else if os.IsNotExist(err) && !readOnly {
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return nil, err
+ }
+ } else {
+ return nil, err
+ }
+
+ flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly)
+ if err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err != nil {
+ flock.release()
+ }
+ }()
+
+ var (
+ logw *os.File
+ logSize int64
+ )
+ if !readOnly {
+ logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return nil, err
+ }
+ logSize, err = logw.Seek(0, os.SEEK_END)
+ if err != nil {
+ logw.Close()
+ return nil, err
+ }
+ }
+
+ fs := &fileStorage{
+ path: path,
+ readOnly: readOnly,
+ flock: flock,
+ logw: logw,
+ logSize: logSize,
+ }
+ runtime.SetFinalizer(fs, (*fileStorage).Close)
+ return fs, nil
+}
+
+func (fs *fileStorage) Lock() (Locker, error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ if fs.readOnly {
+ return &fileStorageLock{}, nil
+ }
+ if fs.slock != nil {
+ return nil, ErrLocked
+ }
+ fs.slock = &fileStorageLock{fs: fs}
+ return fs.slock, nil
+}
+
+func itoa(buf []byte, i int, wid int) []byte {
+ u := uint(i)
+ if u == 0 && wid <= 1 {
+ return append(buf, '0')
+ }
+
+ // Assemble decimal in reverse order.
+ var b [32]byte
+ bp := len(b)
+ for ; u > 0 || wid > 0; u /= 10 {
+ bp--
+ wid--
+ b[bp] = byte(u%10) + '0'
+ }
+ return append(buf, b[bp:]...)
+}
+
+func (fs *fileStorage) printDay(t time.Time) {
+ if fs.day == t.Day() {
+ return
+ }
+ fs.day = t.Day()
+ fs.logw.Write([]byte("=============== " + t.Format("Jan 2, 2006 (MST)") + " ===============\n"))
+}
+
+func (fs *fileStorage) doLog(t time.Time, str string) {
+ if fs.logSize > logSizeThreshold {
+ // Rotate log file.
+ fs.logw.Close()
+ fs.logw = nil
+ fs.logSize = 0
+ rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old"))
+ }
+ if fs.logw == nil {
+ var err error
+ fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return
+ }
+ // Force printDay on new log file.
+ fs.day = 0
+ }
+ fs.printDay(t)
+ hour, min, sec := t.Clock()
+ msec := t.Nanosecond() / 1e3
+ // time
+ fs.buf = itoa(fs.buf[:0], hour, 2)
+ fs.buf = append(fs.buf, ':')
+ fs.buf = itoa(fs.buf, min, 2)
+ fs.buf = append(fs.buf, ':')
+ fs.buf = itoa(fs.buf, sec, 2)
+ fs.buf = append(fs.buf, '.')
+ fs.buf = itoa(fs.buf, msec, 6)
+ fs.buf = append(fs.buf, ' ')
+ // write
+ fs.buf = append(fs.buf, []byte(str)...)
+ fs.buf = append(fs.buf, '\n')
+ fs.logw.Write(fs.buf)
+}
+
+func (fs *fileStorage) Log(str string) {
+ if !fs.readOnly {
+ t := time.Now()
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return
+ }
+ fs.doLog(t, str)
+ }
+}
+
+func (fs *fileStorage) log(str string) {
+ if !fs.readOnly {
+ fs.doLog(time.Now(), str)
+ }
+}
+
+func (fs *fileStorage) SetMeta(fd FileDesc) (err error) {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ defer func() {
+ if err != nil {
+ fs.log(fmt.Sprintf("CURRENT: %v", err))
+ }
+ }()
+ path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
+ w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return
+ }
+ _, err = fmt.Fprintln(w, fsGenName(fd))
+ // Close the file first.
+ if cerr := w.Close(); cerr != nil {
+ fs.log(fmt.Sprintf("close CURRENT.%d: %v", fd.Num, cerr))
+ }
+ if err != nil {
+ return
+ }
+ return rename(path, filepath.Join(fs.path, "CURRENT"))
+}
+
+func (fs *fileStorage) GetMeta() (fd FileDesc, err error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return FileDesc{}, ErrClosed
+ }
+ dir, err := os.Open(fs.path)
+ if err != nil {
+ return
+ }
+ names, err := dir.Readdirnames(0)
+ // Close the dir first before checking for Readdirnames error.
+ if ce := dir.Close(); ce != nil {
+ fs.log(fmt.Sprintf("close dir: %v", ce))
+ }
+ if err != nil {
+ return
+ }
+ // Find latest CURRENT file.
+ var rem []string
+ var pend bool
+ var cerr error
+ for _, name := range names {
+ if strings.HasPrefix(name, "CURRENT") {
+ pend1 := len(name) > 7
+ var pendNum int64
+ // Make sure it is valid name for a CURRENT file, otherwise skip it.
+ if pend1 {
+ if name[7] != '.' || len(name) < 9 {
+ fs.log(fmt.Sprintf("skipping %s: invalid file name", name))
+ continue
+ }
+ var e1 error
+ if pendNum, e1 = strconv.ParseInt(name[8:], 10, 0); e1 != nil {
+ fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", name, e1))
+ continue
+ }
+ }
+ path := filepath.Join(fs.path, name)
+ r, e1 := os.OpenFile(path, os.O_RDONLY, 0)
+ if e1 != nil {
+ return FileDesc{}, e1
+ }
+ b, e1 := ioutil.ReadAll(r)
+ if e1 != nil {
+ r.Close()
+ return FileDesc{}, e1
+ }
+ var fd1 FileDesc
+ if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd1) {
+ fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", name))
+ if pend1 {
+ rem = append(rem, name)
+ }
+ if !pend1 || cerr == nil {
+ metaFd, _ := fsParseName(name)
+ cerr = &ErrCorrupted{
+ Fd: metaFd,
+ Err: errors.New("leveldb/storage: corrupted or incomplete meta file"),
+ }
+ }
+ } else if pend1 && pendNum != fd1.Num {
+ fs.log(fmt.Sprintf("skipping %s: inconsistent pending-file num: %d vs %d", name, pendNum, fd1.Num))
+ rem = append(rem, name)
+ } else if fd1.Num < fd.Num {
+ fs.log(fmt.Sprintf("skipping %s: obsolete", name))
+ if pend1 {
+ rem = append(rem, name)
+ }
+ } else {
+ fd = fd1
+ pend = pend1
+ }
+ if err := r.Close(); err != nil {
+ fs.log(fmt.Sprintf("close %s: %v", name, err))
+ }
+ }
+ }
+ // Don't remove any files if there is no valid CURRENT file.
+ if fd.Zero() {
+ if cerr != nil {
+ err = cerr
+ } else {
+ err = os.ErrNotExist
+ }
+ return
+ }
+ if !fs.readOnly {
+ // Rename pending CURRENT file to an effective CURRENT.
+ if pend {
+ path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
+ if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil {
+ fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", fd.Num, err))
+ }
+ }
+ // Remove obsolete or incomplete pending CURRENT files.
+ for _, name := range rem {
+ path := filepath.Join(fs.path, name)
+ if err := os.Remove(path); err != nil {
+ fs.log(fmt.Sprintf("remove %s: %v", name, err))
+ }
+ }
+ }
+ return
+}
+
+func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ dir, err := os.Open(fs.path)
+ if err != nil {
+ return
+ }
+ names, err := dir.Readdirnames(0)
+ // Close the dir first before checking for Readdirnames error.
+ if cerr := dir.Close(); cerr != nil {
+ fs.log(fmt.Sprintf("close dir: %v", cerr))
+ }
+ if err == nil {
+ for _, name := range names {
+ if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 {
+ fds = append(fds, fd)
+ }
+ }
+ }
+ return
+}
+
+func (fs *fileStorage) Open(fd FileDesc) (Reader, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0)
+ if err != nil {
+ if fsHasOldName(fd) && os.IsNotExist(err) {
+ of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0)
+ if err == nil {
+ goto ok
+ }
+ }
+ return nil, err
+ }
+ok:
+ fs.open++
+ return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Create(fd FileDesc) (Writer, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+ if fs.readOnly {
+ return nil, errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return nil, err
+ }
+ fs.open++
+ return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Remove(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ err := os.Remove(filepath.Join(fs.path, fsGenName(fd)))
+ if err != nil {
+ if fsHasOldName(fd) && os.IsNotExist(err) {
+ if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) {
+ fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err))
+ err = e1
+ }
+ } else {
+ fs.log(fmt.Sprintf("remove %s: %v", fd, err))
+ }
+ }
+ return err
+}
+
+func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error {
+ if !FileDescOk(oldfd) || !FileDescOk(newfd) {
+ return ErrInvalidFile
+ }
+ if oldfd == newfd {
+ return nil
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd)))
+}
+
+func (fs *fileStorage) Close() error {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ // Clear the finalizer.
+ runtime.SetFinalizer(fs, nil)
+
+ if fs.open > 0 {
+ fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open))
+ }
+ fs.open = -1
+ if fs.logw != nil {
+ fs.logw.Close()
+ }
+ return fs.flock.release()
+}
+
+type fileWrap struct {
+ *os.File
+ fs *fileStorage
+ fd FileDesc
+ closed bool
+}
+
+func (fw *fileWrap) Sync() error {
+ if err := fw.File.Sync(); err != nil {
+ return err
+ }
+ if fw.fd.Type == TypeManifest {
+ // Also sync parent directory if file type is manifest.
+ // See: https://code.google.com/p/leveldb/issues/detail?id=190.
+ if err := syncDir(fw.fs.path); err != nil {
+ fw.fs.log(fmt.Sprintf("syncDir: %v", err))
+ return err
+ }
+ }
+ return nil
+}
+
+func (fw *fileWrap) Close() error {
+ fw.fs.mu.Lock()
+ defer fw.fs.mu.Unlock()
+ if fw.closed {
+ return ErrClosed
+ }
+ fw.closed = true
+ fw.fs.open--
+ err := fw.File.Close()
+ if err != nil {
+ fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err))
+ }
+ return err
+}
+
+func fsGenName(fd FileDesc) string {
+ switch fd.Type {
+ case TypeManifest:
+ return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+ case TypeJournal:
+ return fmt.Sprintf("%06d.log", fd.Num)
+ case TypeTable:
+ return fmt.Sprintf("%06d.ldb", fd.Num)
+ case TypeTemp:
+ return fmt.Sprintf("%06d.tmp", fd.Num)
+ default:
+ panic("invalid file type")
+ }
+}
+
+func fsHasOldName(fd FileDesc) bool {
+ return fd.Type == TypeTable
+}
+
+func fsGenOldName(fd FileDesc) string {
+ switch fd.Type {
+ case TypeTable:
+ return fmt.Sprintf("%06d.sst", fd.Num)
+ }
+ return fsGenName(fd)
+}
+
+func fsParseName(name string) (fd FileDesc, ok bool) {
+ var tail string
+ _, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail)
+ if err == nil {
+ switch tail {
+ case "log":
+ fd.Type = TypeJournal
+ case "ldb", "sst":
+ fd.Type = TypeTable
+ case "tmp":
+ fd.Type = TypeTemp
+ default:
+ return
+ }
+ return fd, true
+ }
+ n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail)
+ if n == 1 {
+ fd.Type = TypeManifest
+ return fd, true
+ }
+ return
+}
+
+func fsParseNamePtr(name string, fd *FileDesc) bool {
+ _fd, ok := fsParseName(name)
+ if fd != nil {
+ *fd = _fd
+ }
+ return ok
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
new file mode 100644
index 000000000..5545aeef2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
@@ -0,0 +1,34 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build nacl
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ return nil, syscall.ENOTSUP
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ return syscall.ENOTSUP
+}
+
+func rename(oldpath, newpath string) error {
+ return syscall.ENOTSUP
+}
+
+func isErrInvalid(err error) bool {
+ return false
+}
+
+func syncDir(name string) error {
+ return syscall.ENOTSUP
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
new file mode 100644
index 000000000..bab62bfce
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
@@ -0,0 +1,65 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "os"
+ "path/filepath"
+)
+
+type plan9FileLock struct {
+ f *os.File
+}
+
+func (fl *plan9FileLock) release() error {
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var (
+ flag int
+ perm os.FileMode
+ )
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ perm = os.ModeExclusive
+ }
+ f, err := os.OpenFile(path, flag, perm)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644)
+ }
+ if err != nil {
+ return
+ }
+ fl = &plan9FileLock{f: f}
+ return
+}
+
+func rename(oldpath, newpath string) error {
+ if _, err := os.Stat(newpath); err == nil {
+ if err := os.Remove(newpath); err != nil {
+ return err
+ }
+ }
+
+ _, fname := filepath.Split(newpath)
+ return os.Rename(oldpath, fname)
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
new file mode 100644
index 000000000..79901ee4a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
@@ -0,0 +1,81 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build solaris
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+type unixFileLock struct {
+ f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+ if err := setFileLock(fl.f, false, false); err != nil {
+ return err
+ }
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var flag int
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ }
+ f, err := os.OpenFile(path, flag, 0)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+ }
+ if err != nil {
+ return
+ }
+ err = setFileLock(f, readOnly, true)
+ if err != nil {
+ f.Close()
+ return
+ }
+ fl = &unixFileLock{f: f}
+ return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ flock := syscall.Flock_t{
+ Type: syscall.F_UNLCK,
+ Start: 0,
+ Len: 0,
+ Whence: 1,
+ }
+ if lock {
+ if readOnly {
+ flock.Type = syscall.F_RDLCK
+ } else {
+ flock.Type = syscall.F_WRLCK
+ }
+ }
+ return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock)
+}
+
+func rename(oldpath, newpath string) error {
+ return os.Rename(oldpath, newpath)
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
new file mode 100644
index 000000000..7e2991537
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
@@ -0,0 +1,86 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+type unixFileLock struct {
+ f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+ if err := setFileLock(fl.f, false, false); err != nil {
+ return err
+ }
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var flag int
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ }
+ f, err := os.OpenFile(path, flag, 0)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+ }
+ if err != nil {
+ return
+ }
+ err = setFileLock(f, readOnly, true)
+ if err != nil {
+ f.Close()
+ return
+ }
+ fl = &unixFileLock{f: f}
+ return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ how := syscall.LOCK_UN
+ if lock {
+ if readOnly {
+ how = syscall.LOCK_SH
+ } else {
+ how = syscall.LOCK_EX
+ }
+ }
+ return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB)
+}
+
+func rename(oldpath, newpath string) error {
+ return os.Rename(oldpath, newpath)
+}
+
+func isErrInvalid(err error) bool {
+ if err == os.ErrInvalid {
+ return true
+ }
+ if syserr, ok := err.(*os.SyscallError); ok && syserr.Err == syscall.EINVAL {
+ return true
+ }
+ return false
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil && !isErrInvalid(err) {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
new file mode 100644
index 000000000..899335fd7
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+var (
+ modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+ procMoveFileExW = modkernel32.NewProc("MoveFileExW")
+)
+
+const (
+ _MOVEFILE_REPLACE_EXISTING = 1
+)
+
+type windowsFileLock struct {
+ fd syscall.Handle
+}
+
+func (fl *windowsFileLock) release() error {
+ return syscall.Close(fl.fd)
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ pathp, err := syscall.UTF16PtrFromString(path)
+ if err != nil {
+ return
+ }
+ var access, shareMode uint32
+ if readOnly {
+ access = syscall.GENERIC_READ
+ shareMode = syscall.FILE_SHARE_READ
+ } else {
+ access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
+ }
+ fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+ if err == syscall.ERROR_FILE_NOT_FOUND {
+ fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+ }
+ if err != nil {
+ return
+ }
+ fl = &windowsFileLock{fd: fd}
+ return
+}
+
+func moveFileEx(from *uint16, to *uint16, flags uint32) error {
+ r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags))
+ if r1 == 0 {
+ if e1 != 0 {
+ return error(e1)
+ }
+ return syscall.EINVAL
+ }
+ return nil
+}
+
+func rename(oldpath, newpath string) error {
+ from, err := syscall.UTF16PtrFromString(oldpath)
+ if err != nil {
+ return err
+ }
+ to, err := syscall.UTF16PtrFromString(newpath)
+ if err != nil {
+ return err
+ }
+ return moveFileEx(from, to, _MOVEFILE_REPLACE_EXISTING)
+}
+
+func syncDir(name string) error { return nil }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
new file mode 100644
index 000000000..9b0421f03
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
@@ -0,0 +1,218 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "bytes"
+ "os"
+ "sync"
+)
+
+const typeShift = 3
+
+type memStorageLock struct {
+ ms *memStorage
+}
+
+func (lock *memStorageLock) Unlock() {
+ ms := lock.ms
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.slock == lock {
+ ms.slock = nil
+ }
+ return
+}
+
+// memStorage is a memory-backed storage.
+type memStorage struct {
+ mu sync.Mutex
+ slock *memStorageLock
+ files map[uint64]*memFile
+ meta FileDesc
+}
+
+// NewMemStorage returns a new memory-backed storage implementation.
+func NewMemStorage() Storage {
+ return &memStorage{
+ files: make(map[uint64]*memFile),
+ }
+}
+
+func (ms *memStorage) Lock() (Locker, error) {
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.slock != nil {
+ return nil, ErrLocked
+ }
+ ms.slock = &memStorageLock{ms: ms}
+ return ms.slock, nil
+}
+
+func (*memStorage) Log(str string) {}
+
+func (ms *memStorage) SetMeta(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+
+ ms.mu.Lock()
+ ms.meta = fd
+ ms.mu.Unlock()
+ return nil
+}
+
+func (ms *memStorage) GetMeta() (FileDesc, error) {
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.meta.Zero() {
+ return FileDesc{}, os.ErrNotExist
+ }
+ return ms.meta, nil
+}
+
+func (ms *memStorage) List(ft FileType) ([]FileDesc, error) {
+ ms.mu.Lock()
+ var fds []FileDesc
+ for x := range ms.files {
+ fd := unpackFile(x)
+ if fd.Type&ft != 0 {
+ fds = append(fds, fd)
+ }
+ }
+ ms.mu.Unlock()
+ return fds, nil
+}
+
+func (ms *memStorage) Open(fd FileDesc) (Reader, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if m, exist := ms.files[packFile(fd)]; exist {
+ if m.open {
+ return nil, errFileOpen
+ }
+ m.open = true
+ return &memReader{Reader: bytes.NewReader(m.Bytes()), ms: ms, m: m}, nil
+ }
+ return nil, os.ErrNotExist
+}
+
+func (ms *memStorage) Create(fd FileDesc) (Writer, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ x := packFile(fd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ m, exist := ms.files[x]
+ if exist {
+ if m.open {
+ return nil, errFileOpen
+ }
+ m.Reset()
+ } else {
+ m = &memFile{}
+ ms.files[x] = m
+ }
+ m.open = true
+ return &memWriter{memFile: m, ms: ms}, nil
+}
+
+func (ms *memStorage) Remove(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+
+ x := packFile(fd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if _, exist := ms.files[x]; exist {
+ delete(ms.files, x)
+ return nil
+ }
+ return os.ErrNotExist
+}
+
+func (ms *memStorage) Rename(oldfd, newfd FileDesc) error {
+ if FileDescOk(oldfd) || FileDescOk(newfd) {
+ return ErrInvalidFile
+ }
+ if oldfd == newfd {
+ return nil
+ }
+
+ oldx := packFile(oldfd)
+ newx := packFile(newfd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ oldm, exist := ms.files[oldx]
+ if !exist {
+ return os.ErrNotExist
+ }
+ newm, exist := ms.files[newx]
+ if (exist && newm.open) || oldm.open {
+ return errFileOpen
+ }
+ delete(ms.files, oldx)
+ ms.files[newx] = oldm
+ return nil
+}
+
+func (*memStorage) Close() error { return nil }
+
+type memFile struct {
+ bytes.Buffer
+ open bool
+}
+
+type memReader struct {
+ *bytes.Reader
+ ms *memStorage
+ m *memFile
+ closed bool
+}
+
+func (mr *memReader) Close() error {
+ mr.ms.mu.Lock()
+ defer mr.ms.mu.Unlock()
+ if mr.closed {
+ return ErrClosed
+ }
+ mr.m.open = false
+ return nil
+}
+
+type memWriter struct {
+ *memFile
+ ms *memStorage
+ closed bool
+}
+
+func (*memWriter) Sync() error { return nil }
+
+func (mw *memWriter) Close() error {
+ mw.ms.mu.Lock()
+ defer mw.ms.mu.Unlock()
+ if mw.closed {
+ return ErrClosed
+ }
+ mw.memFile.open = false
+ return nil
+}
+
+func packFile(fd FileDesc) uint64 {
+ return uint64(fd.Num)<<typeShift | uint64(fd.Type)
+}
+
+func unpackFile(x uint64) FileDesc {
+ return FileDesc{FileType(x) & TypeAll, int64(x >> typeShift)}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
new file mode 100644
index 000000000..c16bce6b6
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
@@ -0,0 +1,179 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package storage provides storage abstraction for LevelDB.
+package storage
+
+import (
+ "errors"
+ "fmt"
+ "io"
+)
+
+// FileType represent a file type.
+type FileType int
+
+// File types.
+const (
+ TypeManifest FileType = 1 << iota
+ TypeJournal
+ TypeTable
+ TypeTemp
+
+ TypeAll = TypeManifest | TypeJournal | TypeTable | TypeTemp
+)
+
+func (t FileType) String() string {
+ switch t {
+ case TypeManifest:
+ return "manifest"
+ case TypeJournal:
+ return "journal"
+ case TypeTable:
+ return "table"
+ case TypeTemp:
+ return "temp"
+ }
+ return fmt.Sprintf("<unknown:%d>", t)
+}
+
+// Common error.
+var (
+ ErrInvalidFile = errors.New("leveldb/storage: invalid file for argument")
+ ErrLocked = errors.New("leveldb/storage: already locked")
+ ErrClosed = errors.New("leveldb/storage: closed")
+)
+
+// ErrCorrupted is the type that wraps errors that indicate corruption of
+// a file. Package storage has its own type instead of using
+// errors.ErrCorrupted to prevent circular import.
+type ErrCorrupted struct {
+ Fd FileDesc
+ Err error
+}
+
+func (e *ErrCorrupted) Error() string {
+ if !e.Fd.Zero() {
+ return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+ }
+ return e.Err.Error()
+}
+
+// Syncer is the interface that wraps basic Sync method.
+type Syncer interface {
+ // Sync commits the current contents of the file to stable storage.
+ Sync() error
+}
+
+// Reader is the interface that groups the basic Read, Seek, ReadAt and Close
+// methods.
+type Reader interface {
+ io.ReadSeeker
+ io.ReaderAt
+ io.Closer
+}
+
+// Writer is the interface that groups the basic Write, Sync and Close
+// methods.
+type Writer interface {
+ io.WriteCloser
+ Syncer
+}
+
+// Locker is the interface that wraps Unlock method.
+type Locker interface {
+ Unlock()
+}
+
+// FileDesc is a 'file descriptor'.
+type FileDesc struct {
+ Type FileType
+ Num int64
+}
+
+func (fd FileDesc) String() string {
+ switch fd.Type {
+ case TypeManifest:
+ return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+ case TypeJournal:
+ return fmt.Sprintf("%06d.log", fd.Num)
+ case TypeTable:
+ return fmt.Sprintf("%06d.ldb", fd.Num)
+ case TypeTemp:
+ return fmt.Sprintf("%06d.tmp", fd.Num)
+ default:
+ return fmt.Sprintf("%#x-%d", fd.Type, fd.Num)
+ }
+}
+
+// Zero returns true if fd == (FileDesc{}).
+func (fd FileDesc) Zero() bool {
+ return fd == (FileDesc{})
+}
+
+// FileDescOk returns true if fd is a valid 'file descriptor'.
+func FileDescOk(fd FileDesc) bool {
+ switch fd.Type {
+ case TypeManifest:
+ case TypeJournal:
+ case TypeTable:
+ case TypeTemp:
+ default:
+ return false
+ }
+ return fd.Num >= 0
+}
+
+// Storage is the storage. A storage instance must be safe for concurrent use.
+type Storage interface {
+ // Lock locks the storage. Any subsequent attempt to call Lock will fail
+ // until the last lock released.
+ // Caller should call Unlock method after use.
+ Lock() (Locker, error)
+
+ // Log logs a string. This is used for logging.
+ // An implementation may write to a file, stdout or simply do nothing.
+ Log(str string)
+
+ // SetMeta store 'file descriptor' that can later be acquired using GetMeta
+ // method. The 'file descriptor' should point to a valid file.
+ // SetMeta should be implemented in such way that changes should happen
+ // atomically.
+ SetMeta(fd FileDesc) error
+
+ // GetMeta returns 'file descriptor' stored in meta. The 'file descriptor'
+ // can be updated using SetMeta method.
+ // Returns os.ErrNotExist if meta doesn't store any 'file descriptor', or
+ // 'file descriptor' point to nonexistent file.
+ GetMeta() (FileDesc, error)
+
+ // List returns file descriptors that match the given file types.
+ // The file types may be OR'ed together.
+ List(ft FileType) ([]FileDesc, error)
+
+ // Open opens file with the given 'file descriptor' read-only.
+ // Returns os.ErrNotExist error if the file does not exist.
+ // Returns ErrClosed if the underlying storage is closed.
+ Open(fd FileDesc) (Reader, error)
+
+ // Create creates file with the given 'file descriptor', truncate if already
+ // exist and opens write-only.
+ // Returns ErrClosed if the underlying storage is closed.
+ Create(fd FileDesc) (Writer, error)
+
+ // Remove removes file with the given 'file descriptor'.
+ // Returns ErrClosed if the underlying storage is closed.
+ Remove(fd FileDesc) error
+
+ // Rename renames file from oldfd to newfd.
+ // Returns ErrClosed if the underlying storage is closed.
+ Rename(oldfd, newfd FileDesc) error
+
+ // Close closes the storage.
+ // It is valid to call Close multiple times. Other methods should not be
+ // called after the storage has been closed.
+ Close() error
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
new file mode 100644
index 000000000..81d18a531
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
@@ -0,0 +1,529 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sort"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/table"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// tFile holds basic information about a table.
+type tFile struct {
+ fd storage.FileDesc
+ seekLeft int32
+ size int64
+ imin, imax internalKey
+}
+
+// Returns true if given key is after largest key of this table.
+func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
+}
+
+// Returns true if given key is before smallest key of this table.
+func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
+}
+
+// Returns true if given key range overlaps with this table key range.
+func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
+ return !t.after(icmp, umin) && !t.before(icmp, umax)
+}
+
+// Cosumes one seek and return current seeks left.
+func (t *tFile) consumeSeek() int32 {
+ return atomic.AddInt32(&t.seekLeft, -1)
+}
+
+// Creates new tFile.
+func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
+ f := &tFile{
+ fd: fd,
+ size: size,
+ imin: imin,
+ imax: imax,
+ }
+
+ // We arrange to automatically compact this file after
+ // a certain number of seeks. Let's assume:
+ // (1) One seek costs 10ms
+ // (2) Writing or reading 1MB costs 10ms (100MB/s)
+ // (3) A compaction of 1MB does 25MB of IO:
+ // 1MB read from this level
+ // 10-12MB read from next level (boundaries may be misaligned)
+ // 10-12MB written to next level
+ // This implies that 25 seeks cost the same as the compaction
+ // of 1MB of data. I.e., one seek costs approximately the
+ // same as the compaction of 40KB of data. We are a little
+ // conservative and allow approximately one seek for every 16KB
+ // of data before triggering a compaction.
+ f.seekLeft = int32(size / 16384)
+ if f.seekLeft < 100 {
+ f.seekLeft = 100
+ }
+
+ return f
+}
+
+func tableFileFromRecord(r atRecord) *tFile {
+ return newTableFile(storage.FileDesc{storage.TypeTable, r.num}, r.size, r.imin, r.imax)
+}
+
+// tFiles hold multiple tFile.
+type tFiles []*tFile
+
+func (tf tFiles) Len() int { return len(tf) }
+func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
+
+func (tf tFiles) nums() string {
+ x := "[ "
+ for i, f := range tf {
+ if i != 0 {
+ x += ", "
+ }
+ x += fmt.Sprint(f.fd.Num)
+ }
+ x += " ]"
+ return x
+}
+
+// Returns true if i smallest key is less than j.
+// This used for sort by key in ascending order.
+func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
+ a, b := tf[i], tf[j]
+ n := icmp.Compare(a.imin, b.imin)
+ if n == 0 {
+ return a.fd.Num < b.fd.Num
+ }
+ return n < 0
+}
+
+// Returns true if i file number is greater than j.
+// This used for sort by file number in descending order.
+func (tf tFiles) lessByNum(i, j int) bool {
+ return tf[i].fd.Num > tf[j].fd.Num
+}
+
+// Sorts tables by key in ascending order.
+func (tf tFiles) sortByKey(icmp *iComparer) {
+ sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
+}
+
+// Sorts tables by file number in descending order.
+func (tf tFiles) sortByNum() {
+ sort.Sort(&tFilesSortByNum{tFiles: tf})
+}
+
+// Returns sum of all tables size.
+func (tf tFiles) size() (sum int64) {
+ for _, t := range tf {
+ sum += t.size
+ }
+ return sum
+}
+
+// Searches smallest index of tables whose its smallest
+// key is after or equal with given key.
+func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imin, ikey) >= 0
+ })
+}
+
+// Searches smallest index of tables whose its largest
+// key is after or equal with given key.
+func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imax, ikey) >= 0
+ })
+}
+
+// Returns true if given key range overlaps with one or more
+// tables key range. If unsorted is true then binary search will not be used.
+func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
+ if unsorted {
+ // Check against all files.
+ for _, t := range tf {
+ if t.overlaps(icmp, umin, umax) {
+ return true
+ }
+ }
+ return false
+ }
+
+ i := 0
+ if len(umin) > 0 {
+ // Find the earliest possible internal key for min.
+ i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
+ }
+ if i >= len(tf) {
+ // Beginning of range is after all files, so no overlap.
+ return false
+ }
+ return !tf[i].before(icmp, umax)
+}
+
+// Returns tables whose its key range overlaps with given key range.
+// Range will be expanded if ukey found hop across tables.
+// If overlapped is true then the search will be restarted if umax
+// expanded.
+// The dst content will be overwritten.
+func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
+ dst = dst[:0]
+ for i := 0; i < len(tf); {
+ t := tf[i]
+ if t.overlaps(icmp, umin, umax) {
+ if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
+ umin = t.imin.ukey()
+ dst = dst[:0]
+ i = 0
+ continue
+ } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
+ umax = t.imax.ukey()
+ // Restart search if it is overlapped.
+ if overlapped {
+ dst = dst[:0]
+ i = 0
+ continue
+ }
+ }
+
+ dst = append(dst, t)
+ }
+ i++
+ }
+
+ return dst
+}
+
+// Returns tables key range.
+func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
+ for i, t := range tf {
+ if i == 0 {
+ imin, imax = t.imin, t.imax
+ continue
+ }
+ if icmp.Compare(t.imin, imin) < 0 {
+ imin = t.imin
+ }
+ if icmp.Compare(t.imax, imax) > 0 {
+ imax = t.imax
+ }
+ }
+
+ return
+}
+
+// Creates iterator index from tables.
+func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
+ if slice != nil {
+ var start, limit int
+ if slice.Start != nil {
+ start = tf.searchMax(icmp, internalKey(slice.Start))
+ }
+ if slice.Limit != nil {
+ limit = tf.searchMin(icmp, internalKey(slice.Limit))
+ } else {
+ limit = tf.Len()
+ }
+ tf = tf[start:limit]
+ }
+ return iterator.NewArrayIndexer(&tFilesArrayIndexer{
+ tFiles: tf,
+ tops: tops,
+ icmp: icmp,
+ slice: slice,
+ ro: ro,
+ })
+}
+
+// Tables iterator index.
+type tFilesArrayIndexer struct {
+ tFiles
+ tops *tOps
+ icmp *iComparer
+ slice *util.Range
+ ro *opt.ReadOptions
+}
+
+func (a *tFilesArrayIndexer) Search(key []byte) int {
+ return a.searchMax(a.icmp, internalKey(key))
+}
+
+func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
+ if i == 0 || i == a.Len()-1 {
+ return a.tops.newIterator(a.tFiles[i], a.slice, a.ro)
+ }
+ return a.tops.newIterator(a.tFiles[i], nil, a.ro)
+}
+
+// Helper type for sortByKey.
+type tFilesSortByKey struct {
+ tFiles
+ icmp *iComparer
+}
+
+func (x *tFilesSortByKey) Less(i, j int) bool {
+ return x.lessByKey(x.icmp, i, j)
+}
+
+// Helper type for sortByNum.
+type tFilesSortByNum struct {
+ tFiles
+}
+
+func (x *tFilesSortByNum) Less(i, j int) bool {
+ return x.lessByNum(i, j)
+}
+
+// Table operations.
+type tOps struct {
+ s *session
+ noSync bool
+ cache *cache.Cache
+ bcache *cache.Cache
+ bpool *util.BufferPool
+}
+
+// Creates an empty table and returns table writer.
+func (t *tOps) create() (*tWriter, error) {
+ fd := storage.FileDesc{storage.TypeTable, t.s.allocFileNum()}
+ fw, err := t.s.stor.Create(fd)
+ if err != nil {
+ return nil, err
+ }
+ return &tWriter{
+ t: t,
+ fd: fd,
+ w: fw,
+ tw: table.NewWriter(fw, t.s.o.Options),
+ }, nil
+}
+
+// Builds table from src iterator.
+func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
+ w, err := t.create()
+ if err != nil {
+ return
+ }
+
+ defer func() {
+ if err != nil {
+ w.drop()
+ }
+ }()
+
+ for src.Next() {
+ err = w.append(src.Key(), src.Value())
+ if err != nil {
+ return
+ }
+ }
+ err = src.Error()
+ if err != nil {
+ return
+ }
+
+ n = w.tw.EntriesLen()
+ f, err = w.finish()
+ return
+}
+
+// Opens table. It returns a cache handle, which should
+// be released after use.
+func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
+ ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
+ var r storage.Reader
+ r, err = t.s.stor.Open(f.fd)
+ if err != nil {
+ return 0, nil
+ }
+
+ var bcache *cache.NamespaceGetter
+ if t.bcache != nil {
+ bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
+ }
+
+ var tr *table.Reader
+ tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
+ if err != nil {
+ r.Close()
+ return 0, nil
+ }
+ return 1, tr
+
+ })
+ if ch == nil && err == nil {
+ err = ErrClosed
+ }
+ return
+}
+
+// Finds key/value pair whose key is greater than or equal to the
+// given key.
+func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).Find(key, true, ro)
+}
+
+// Finds key that is greater than or equal to the given key.
+func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).FindKey(key, true, ro)
+}
+
+// Returns approximate offset of the given key.
+func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).OffsetOf(key)
+}
+
+// Creates an iterator from the given table.
+func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ ch, err := t.open(f)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
+ iter.SetReleaser(ch)
+ return iter
+}
+
+// Removes table from persistent storage. It waits until
+// no one use the the table.
+func (t *tOps) remove(f *tFile) {
+ t.cache.Delete(0, uint64(f.fd.Num), func() {
+ if err := t.s.stor.Remove(f.fd); err != nil {
+ t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
+ } else {
+ t.s.logf("table@remove removed @%d", f.fd.Num)
+ }
+ if t.bcache != nil {
+ t.bcache.EvictNS(uint64(f.fd.Num))
+ }
+ })
+}
+
+// Closes the table ops instance. It will close all tables,
+// regadless still used or not.
+func (t *tOps) close() {
+ t.bpool.Close()
+ t.cache.Close()
+ if t.bcache != nil {
+ t.bcache.CloseWeak()
+ }
+}
+
+// Creates new initialized table ops instance.
+func newTableOps(s *session) *tOps {
+ var (
+ cacher cache.Cacher
+ bcache *cache.Cache
+ bpool *util.BufferPool
+ )
+ if s.o.GetOpenFilesCacheCapacity() > 0 {
+ cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
+ }
+ if !s.o.GetDisableBlockCache() {
+ var bcacher cache.Cacher
+ if s.o.GetBlockCacheCapacity() > 0 {
+ bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
+ }
+ bcache = cache.NewCache(bcacher)
+ }
+ if !s.o.GetDisableBufferPool() {
+ bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
+ }
+ return &tOps{
+ s: s,
+ noSync: s.o.GetNoSync(),
+ cache: cache.NewCache(cacher),
+ bcache: bcache,
+ bpool: bpool,
+ }
+}
+
+// tWriter wraps the table writer. It keep track of file descriptor
+// and added key range.
+type tWriter struct {
+ t *tOps
+
+ fd storage.FileDesc
+ w storage.Writer
+ tw *table.Writer
+
+ first, last []byte
+}
+
+// Append key/value pair to the table.
+func (w *tWriter) append(key, value []byte) error {
+ if w.first == nil {
+ w.first = append([]byte{}, key...)
+ }
+ w.last = append(w.last[:0], key...)
+ return w.tw.Append(key, value)
+}
+
+// Returns true if the table is empty.
+func (w *tWriter) empty() bool {
+ return w.first == nil
+}
+
+// Closes the storage.Writer.
+func (w *tWriter) close() {
+ if w.w != nil {
+ w.w.Close()
+ w.w = nil
+ }
+}
+
+// Finalizes the table and returns table file.
+func (w *tWriter) finish() (f *tFile, err error) {
+ defer w.close()
+ err = w.tw.Close()
+ if err != nil {
+ return
+ }
+ if !w.t.noSync {
+ err = w.w.Sync()
+ if err != nil {
+ return
+ }
+ }
+ f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
+ return
+}
+
+// Drops the table.
+func (w *tWriter) drop() {
+ w.close()
+ w.t.s.stor.Remove(w.fd)
+ w.t.s.reuseFileNum(w.fd.Num)
+ w.tw = nil
+ w.first = nil
+ w.last = nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
new file mode 100644
index 000000000..c5be420b3
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
@@ -0,0 +1,1134 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "sort"
+ "strings"
+ "sync"
+
+ "github.com/golang/snappy"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrReaderReleased = errors.New("leveldb/table: reader released")
+ ErrIterReleased = errors.New("leveldb/table: iterator released")
+)
+
+// ErrCorrupted describes error due to corruption. This error will be wrapped
+// with errors.ErrCorrupted.
+type ErrCorrupted struct {
+ Pos int64
+ Size int64
+ Kind string
+ Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason)
+}
+
+func max(x, y int) int {
+ if x > y {
+ return x
+ }
+ return y
+}
+
+type block struct {
+ bpool *util.BufferPool
+ bh blockHandle
+ data []byte
+ restartsLen int
+ restartsOffset int
+}
+
+func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) {
+ index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+ offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):]))
+ offset++ // shared always zero, since this is a restart point
+ v1, n1 := binary.Uvarint(b.data[offset:]) // key length
+ _, n2 := binary.Uvarint(b.data[offset+n1:]) // value length
+ m := offset + n1 + n2
+ return cmp.Compare(b.data[m:m+int(v1)], key) > 0
+ }) + rstart - 1
+ if index < rstart {
+ // The smallest key is greater-than key sought.
+ index = rstart
+ }
+ offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+ return
+}
+
+func (b *block) restartIndex(rstart, rlimit, offset int) int {
+ return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+ return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset
+ }) + rstart - 1
+}
+
+func (b *block) restartOffset(index int) int {
+ return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+}
+
+func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) {
+ if offset >= b.restartsOffset {
+ if offset != b.restartsOffset {
+ err = &ErrCorrupted{Reason: "entries offset not aligned"}
+ }
+ return
+ }
+ v0, n0 := binary.Uvarint(b.data[offset:]) // Shared prefix length
+ v1, n1 := binary.Uvarint(b.data[offset+n0:]) // Key length
+ v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length
+ m := n0 + n1 + n2
+ n = m + int(v1) + int(v2)
+ if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset {
+ err = &ErrCorrupted{Reason: "entries corrupted"}
+ return
+ }
+ key = b.data[offset+m : offset+m+int(v1)]
+ value = b.data[offset+m+int(v1) : offset+n]
+ nShared = int(v0)
+ return
+}
+
+func (b *block) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
+}
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+type blockIter struct {
+ tr *Reader
+ block *block
+ blockReleaser util.Releaser
+ releaser util.Releaser
+ key, value []byte
+ offset int
+ // Previous offset, only filled by Next.
+ prevOffset int
+ prevNode []int
+ prevKeys []byte
+ restartIndex int
+ // Iterator direction.
+ dir dir
+ // Restart index slice range.
+ riStart int
+ riLimit int
+ // Offset slice range.
+ offsetStart int
+ offsetRealStart int
+ offsetLimit int
+ // Error.
+ err error
+}
+
+func (i *blockIter) sErr(err error) {
+ i.err = err
+ i.key = nil
+ i.value = nil
+ i.prevNode = nil
+ i.prevKeys = nil
+}
+
+func (i *blockIter) reset() {
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.restartIndex = i.riStart
+ i.offset = i.offsetStart
+ i.dir = dirSOI
+ i.key = i.key[:0]
+ i.value = nil
+}
+
+func (i *blockIter) isFirst() bool {
+ switch i.dir {
+ case dirForward:
+ return i.prevOffset == i.offsetRealStart
+ case dirBackward:
+ return len(i.prevNode) == 1 && i.restartIndex == i.riStart
+ }
+ return false
+}
+
+func (i *blockIter) isLast() bool {
+ switch i.dir {
+ case dirForward, dirBackward:
+ return i.offset == i.offsetLimit
+ }
+ return false
+}
+
+func (i *blockIter) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.dir = dirSOI
+ return i.Next()
+}
+
+func (i *blockIter) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.dir = dirEOI
+ return i.Prev()
+}
+
+func (i *blockIter) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key)
+ if err != nil {
+ i.sErr(err)
+ return false
+ }
+ i.restartIndex = ri
+ i.offset = max(i.offsetStart, offset)
+ if i.dir == dirSOI || i.dir == dirEOI {
+ i.dir = dirForward
+ }
+ for i.Next() {
+ if i.tr.cmp.Compare(i.key, key) >= 0 {
+ return true
+ }
+ }
+ return false
+}
+
+func (i *blockIter) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirSOI {
+ i.restartIndex = i.riStart
+ i.offset = i.offsetStart
+ } else if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ for i.offset < i.offsetRealStart {
+ key, value, nShared, n, err := i.block.entry(i.offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if n == 0 {
+ i.dir = dirEOI
+ return false
+ }
+ i.key = append(i.key[:nShared], key...)
+ i.value = value
+ i.offset += n
+ }
+ if i.offset >= i.offsetLimit {
+ i.dir = dirEOI
+ if i.offset != i.offsetLimit {
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+ }
+ return false
+ }
+ key, value, nShared, n, err := i.block.entry(i.offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if n == 0 {
+ i.dir = dirEOI
+ return false
+ }
+ i.key = append(i.key[:nShared], key...)
+ i.value = value
+ i.prevOffset = i.offset
+ i.offset += n
+ i.dir = dirForward
+ return true
+}
+
+func (i *blockIter) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ var ri int
+ if i.dir == dirForward {
+ // Change direction.
+ i.offset = i.prevOffset
+ if i.offset == i.offsetRealStart {
+ i.dir = dirSOI
+ return false
+ }
+ ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset)
+ i.dir = dirBackward
+ } else if i.dir == dirEOI {
+ // At the end of iterator.
+ i.restartIndex = i.riLimit
+ i.offset = i.offsetLimit
+ if i.offset == i.offsetRealStart {
+ i.dir = dirSOI
+ return false
+ }
+ ri = i.riLimit - 1
+ i.dir = dirBackward
+ } else if len(i.prevNode) == 1 {
+ // This is the end of a restart range.
+ i.offset = i.prevNode[0]
+ i.prevNode = i.prevNode[:0]
+ if i.restartIndex == i.riStart {
+ i.dir = dirSOI
+ return false
+ }
+ i.restartIndex--
+ ri = i.restartIndex
+ } else {
+ // In the middle of restart range, get from cache.
+ n := len(i.prevNode) - 3
+ node := i.prevNode[n:]
+ i.prevNode = i.prevNode[:n]
+ // Get the key.
+ ko := node[0]
+ i.key = append(i.key[:0], i.prevKeys[ko:]...)
+ i.prevKeys = i.prevKeys[:ko]
+ // Get the value.
+ vo := node[1]
+ vl := vo + node[2]
+ i.value = i.block.data[vo:vl]
+ i.offset = vl
+ return true
+ }
+ // Build entries cache.
+ i.key = i.key[:0]
+ i.value = nil
+ offset := i.block.restartOffset(ri)
+ if offset == i.offset {
+ ri--
+ if ri < 0 {
+ i.dir = dirSOI
+ return false
+ }
+ offset = i.block.restartOffset(ri)
+ }
+ i.prevNode = append(i.prevNode, offset)
+ for {
+ key, value, nShared, n, err := i.block.entry(offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if offset >= i.offsetRealStart {
+ if i.value != nil {
+ // Appends 3 variables:
+ // 1. Previous keys offset
+ // 2. Value offset in the data block
+ // 3. Value length
+ i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value))
+ i.prevKeys = append(i.prevKeys, i.key...)
+ }
+ i.value = value
+ }
+ i.key = append(i.key[:nShared], key...)
+ offset += n
+ // Stop if target offset reached.
+ if offset >= i.offset {
+ if offset != i.offset {
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+ return false
+ }
+
+ break
+ }
+ }
+ i.restartIndex = ri
+ i.offset = offset
+ return true
+}
+
+func (i *blockIter) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.key
+}
+
+func (i *blockIter) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.value
+}
+
+func (i *blockIter) Release() {
+ if i.dir != dirReleased {
+ i.tr = nil
+ i.block = nil
+ i.prevNode = nil
+ i.prevKeys = nil
+ i.key = nil
+ i.value = nil
+ i.dir = dirReleased
+ if i.blockReleaser != nil {
+ i.blockReleaser.Release()
+ i.blockReleaser = nil
+ }
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+ }
+}
+
+func (i *blockIter) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *blockIter) Valid() bool {
+ return i.err == nil && (i.dir == dirBackward || i.dir == dirForward)
+}
+
+func (i *blockIter) Error() error {
+ return i.err
+}
+
+type filterBlock struct {
+ bpool *util.BufferPool
+ data []byte
+ oOffset int
+ baseLg uint
+ filtersNum int
+}
+
+func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool {
+ i := int(offset >> b.baseLg)
+ if i < b.filtersNum {
+ o := b.data[b.oOffset+i*4:]
+ n := int(binary.LittleEndian.Uint32(o))
+ m := int(binary.LittleEndian.Uint32(o[4:]))
+ if n < m && m <= b.oOffset {
+ return filter.Contains(b.data[n:m], key)
+ } else if n == m {
+ return false
+ }
+ }
+ return true
+}
+
+func (b *filterBlock) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
+}
+
+type indexIter struct {
+ *blockIter
+ tr *Reader
+ slice *util.Range
+ // Options
+ fillCache bool
+}
+
+func (i *indexIter) Get() iterator.Iterator {
+ value := i.Value()
+ if value == nil {
+ return nil
+ }
+ dataBH, n := decodeBlockHandle(value)
+ if n == 0 {
+ return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle"))
+ }
+
+ var slice *util.Range
+ if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) {
+ slice = i.slice
+ }
+ return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache)
+}
+
+// Reader is a table reader.
+type Reader struct {
+ mu sync.RWMutex
+ fd storage.FileDesc
+ reader io.ReaderAt
+ cache *cache.NamespaceGetter
+ err error
+ bpool *util.BufferPool
+ // Options
+ o *opt.Options
+ cmp comparer.Comparer
+ filter filter.Filter
+ verifyChecksum bool
+
+ dataEnd int64
+ metaBH, indexBH, filterBH blockHandle
+ indexBlock *block
+ filterBlock *filterBlock
+}
+
+func (r *Reader) blockKind(bh blockHandle) string {
+ switch bh.offset {
+ case r.metaBH.offset:
+ return "meta-block"
+ case r.indexBH.offset:
+ return "index-block"
+ case r.filterBH.offset:
+ if r.filterBH.length > 0 {
+ return "filter-block"
+ }
+ }
+ return "data-block"
+}
+
+func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
+ return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
+}
+
+func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
+ return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason)
+}
+
+func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
+ if cerr, ok := err.(*ErrCorrupted); ok {
+ cerr.Pos = int64(bh.offset)
+ cerr.Size = int64(bh.length)
+ cerr.Kind = r.blockKind(bh)
+ return &errors.ErrCorrupted{Fd: r.fd, Err: cerr}
+ }
+ return err
+}
+
+func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) {
+ data := r.bpool.Get(int(bh.length + blockTrailerLen))
+ if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF {
+ return nil, err
+ }
+
+ if verifyChecksum {
+ n := bh.length + 1
+ checksum0 := binary.LittleEndian.Uint32(data[n:])
+ checksum1 := util.NewCRC(data[:n]).Value()
+ if checksum0 != checksum1 {
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1))
+ }
+ }
+
+ switch data[bh.length] {
+ case blockTypeNoCompression:
+ data = data[:bh.length]
+ case blockTypeSnappyCompression:
+ decLen, err := snappy.DecodedLen(data[:bh.length])
+ if err != nil {
+ return nil, r.newErrCorruptedBH(bh, err.Error())
+ }
+ decData := r.bpool.Get(decLen)
+ decData, err = snappy.Decode(decData, data[:bh.length])
+ r.bpool.Put(data)
+ if err != nil {
+ r.bpool.Put(decData)
+ return nil, r.newErrCorruptedBH(bh, err.Error())
+ }
+ data = decData
+ default:
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length]))
+ }
+ return data, nil
+}
+
+func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) {
+ data, err := r.readRawBlock(bh, verifyChecksum)
+ if err != nil {
+ return nil, err
+ }
+ restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
+ b := &block{
+ bpool: r.bpool,
+ bh: bh,
+ data: data,
+ restartsLen: restartsLen,
+ restartsOffset: len(data) - (restartsLen+1)*4,
+ }
+ return b, nil
+}
+
+func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) {
+ if r.cache != nil {
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *block
+ b, err = r.readBlock(bh, verifyChecksum)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*block)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
+ }
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readBlock(bh, verifyChecksum)
+ return b, b, err
+}
+
+func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) {
+ data, err := r.readRawBlock(bh, true)
+ if err != nil {
+ return nil, err
+ }
+ n := len(data)
+ if n < 5 {
+ return nil, r.newErrCorruptedBH(bh, "too short")
+ }
+ m := n - 5
+ oOffset := int(binary.LittleEndian.Uint32(data[m:]))
+ if oOffset > m {
+ return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset")
+ }
+ b := &filterBlock{
+ bpool: r.bpool,
+ data: data,
+ oOffset: oOffset,
+ baseLg: uint(data[n-1]),
+ filtersNum: (m - oOffset) / 4,
+ }
+ return b, nil
+}
+
+func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) {
+ if r.cache != nil {
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *filterBlock
+ b, err = r.readFilterBlock(bh)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*filterBlock)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
+ }
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readFilterBlock(bh)
+ return b, b, err
+}
+
+func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) {
+ if r.indexBlock == nil {
+ return r.readBlockCached(r.indexBH, true, fillCache)
+ }
+ return r.indexBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) {
+ if r.filterBlock == nil {
+ return r.readFilterBlockCached(r.filterBH, fillCache)
+ }
+ return r.filterBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter {
+ bi := &blockIter{
+ tr: r,
+ block: b,
+ blockReleaser: bReleaser,
+ // Valid key should never be nil.
+ key: make([]byte, 0),
+ dir: dirSOI,
+ riStart: 0,
+ riLimit: b.restartsLen,
+ offsetStart: 0,
+ offsetRealStart: 0,
+ offsetLimit: b.restartsOffset,
+ }
+ if slice != nil {
+ if slice.Start != nil {
+ if bi.Seek(slice.Start) {
+ bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
+ bi.offsetStart = b.restartOffset(bi.riStart)
+ bi.offsetRealStart = bi.prevOffset
+ } else {
+ bi.riStart = b.restartsLen
+ bi.offsetStart = b.restartsOffset
+ bi.offsetRealStart = b.restartsOffset
+ }
+ }
+ if slice.Limit != nil {
+ if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
+ bi.offsetLimit = bi.prevOffset
+ bi.riLimit = bi.restartIndex + 1
+ }
+ }
+ bi.reset()
+ if bi.offsetStart > bi.offsetLimit {
+ bi.sErr(errors.New("leveldb/table: invalid slice range"))
+ }
+ }
+ return bi
+}
+
+func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ return r.newBlockIter(b, rel, slice, false)
+}
+
+func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ return iterator.NewEmptyIterator(r.err)
+ }
+
+ return r.getDataIter(dataBH, slice, verifyChecksum, fillCache)
+}
+
+// NewIterator creates an iterator from the table.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// table. And a nil Range.Limit is treated as a key after all keys in
+// the table.
+//
+// The returned iterator is not safe for concurrent use and should be released
+// after use.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ return iterator.NewEmptyIterator(r.err)
+ }
+
+ fillCache := !ro.GetDontFillCache()
+ indexBlock, rel, err := r.getIndexBlock(fillCache)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ index := &indexIter{
+ blockIter: r.newBlockIter(indexBlock, rel, slice, true),
+ tr: r,
+ slice: slice,
+ fillCache: !ro.GetDontFillCache(),
+ }
+ return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader))
+}
+
+func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ indexBlock, rel, err := r.getIndexBlock(true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
+ defer index.Release()
+
+ if !index.Seek(key) {
+ if err = index.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+
+ dataBH, n := decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return nil, nil, r.err
+ }
+
+ // The filter should only used for exact match.
+ if filtered && r.filter != nil {
+ filterBlock, frel, ferr := r.getFilterBlock(true)
+ if ferr == nil {
+ if !filterBlock.contains(r.filter, dataBH.offset, key) {
+ frel.Release()
+ return nil, nil, ErrNotFound
+ }
+ frel.Release()
+ } else if !errors.IsCorrupted(ferr) {
+ return nil, nil, ferr
+ }
+ }
+
+ data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+ if !data.Seek(key) {
+ data.Release()
+ if err = data.Error(); err != nil {
+ return
+ }
+
+ // The nearest greater-than key is the first key of the next block.
+ if !index.Next() {
+ if err = index.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+
+ dataBH, n = decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return nil, nil, r.err
+ }
+
+ data = r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+ if !data.Next() {
+ data.Release()
+ if err = data.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+ }
+
+ // Key doesn't use block buffer, no need to copy the buffer.
+ rkey = data.Key()
+ if !noValue {
+ if r.bpool == nil {
+ value = data.Value()
+ } else {
+ // Value does use block buffer, and since the buffer will be
+ // recycled, it need to be copied.
+ value = append([]byte{}, data.Value()...)
+ }
+ }
+ data.Release()
+ return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such pair doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) {
+ return r.find(key, filtered, ro, false)
+}
+
+// FindKey finds key that is greater than or equal to the given key.
+// It returns ErrNotFound if the table doesn't contain such key.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such key doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) {
+ rkey, _, err = r.find(key, filtered, ro, true)
+ return
+}
+
+// Get gets the value for the given key. It returns errors.ErrNotFound
+// if the table does not contain the key.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ rkey, value, err := r.find(key, false, ro, false)
+ if err == nil && r.cmp.Compare(rkey, key) != 0 {
+ value = nil
+ err = ErrNotFound
+ }
+ return
+}
+
+// OffsetOf returns approximate offset for the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
+ defer index.Release()
+ if index.Seek(key) {
+ dataBH, n := decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return
+ }
+ offset = int64(dataBH.offset)
+ return
+ }
+ err = index.Error()
+ if err == nil {
+ offset = r.dataEnd
+ }
+ return
+}
+
+// Release implements util.Releaser.
+// It also close the file if it is an io.Closer.
+func (r *Reader) Release() {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if closer, ok := r.reader.(io.Closer); ok {
+ closer.Close()
+ }
+ if r.indexBlock != nil {
+ r.indexBlock.Release()
+ r.indexBlock = nil
+ }
+ if r.filterBlock != nil {
+ r.filterBlock.Release()
+ r.filterBlock = nil
+ }
+ r.reader = nil
+ r.cache = nil
+ r.bpool = nil
+ r.err = ErrReaderReleased
+}
+
+// NewReader creates a new initialized table reader for the file.
+// The fi, cache and bpool is optional and can be nil.
+//
+// The returned table reader instance is safe for concurrent use.
+func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
+ if f == nil {
+ return nil, errors.New("leveldb/table: nil file")
+ }
+
+ r := &Reader{
+ fd: fd,
+ reader: f,
+ cache: cache,
+ bpool: bpool,
+ o: o,
+ cmp: o.GetComparer(),
+ verifyChecksum: o.GetStrict(opt.StrictBlockChecksum),
+ }
+
+ if size < footerLen {
+ r.err = r.newErrCorrupted(0, size, "table", "too small")
+ return r, nil
+ }
+
+ footerPos := size - footerLen
+ var footer [footerLen]byte
+ if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF {
+ return nil, err
+ }
+ if string(footer[footerLen-len(magic):footerLen]) != magic {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number")
+ return r, nil
+ }
+
+ var n int
+ // Decode the metaindex block handle.
+ r.metaBH, n = decodeBlockHandle(footer[:])
+ if n == 0 {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle")
+ return r, nil
+ }
+
+ // Decode the index block handle.
+ r.indexBH, n = decodeBlockHandle(footer[n:])
+ if n == 0 {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle")
+ return r, nil
+ }
+
+ // Read metaindex block.
+ metaBlock, err := r.readBlock(r.metaBH, true)
+ if err != nil {
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ }
+ return nil, err
+ }
+
+ // Set data end.
+ r.dataEnd = int64(r.metaBH.offset)
+
+ // Read metaindex.
+ metaIter := r.newBlockIter(metaBlock, nil, nil, true)
+ for metaIter.Next() {
+ key := string(metaIter.Key())
+ if !strings.HasPrefix(key, "filter.") {
+ continue
+ }
+ fn := key[7:]
+ if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn {
+ r.filter = f0
+ } else {
+ for _, f0 := range o.GetAltFilters() {
+ if f0.Name() == fn {
+ r.filter = f0
+ break
+ }
+ }
+ }
+ if r.filter != nil {
+ filterBH, n := decodeBlockHandle(metaIter.Value())
+ if n == 0 {
+ continue
+ }
+ r.filterBH = filterBH
+ // Update data end.
+ r.dataEnd = int64(filterBH.offset)
+ break
+ }
+ }
+ metaIter.Release()
+ metaBlock.Release()
+
+ // Cache index and filter block locally, since we don't have global cache.
+ if cache == nil {
+ r.indexBlock, err = r.readBlock(r.indexBH, true)
+ if err != nil {
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ }
+ return nil, err
+ }
+ if r.filter != nil {
+ r.filterBlock, err = r.readFilterBlock(r.filterBH)
+ if err != nil {
+ if !errors.IsCorrupted(err) {
+ return nil, err
+ }
+
+ // Don't use filter then.
+ r.filter = nil
+ }
+ }
+ }
+
+ return r, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
new file mode 100644
index 000000000..beacdc1f0
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
@@ -0,0 +1,177 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package table allows read and write sorted key/value.
+package table
+
+import (
+ "encoding/binary"
+)
+
+/*
+Table:
+
+Table is consist of one or more data blocks, an optional filter block
+a metaindex block, an index block and a table footer. Metaindex block
+is a special block used to keep parameters of the table, such as filter
+block name and its block handle. Index block is a special block used to
+keep record of data blocks offset and length, index block use one as
+restart interval. The key used by index block are the last key of preceding
+block, shorter separator of adjacent blocks or shorter successor of the
+last key of the last block. Filter block is an optional block contains
+sequence of filter data generated by a filter generator.
+
+Table data structure:
+ + optional
+ /
+ +--------------+--------------+--------------+------+-------+-----------------+-------------+--------+
+ | data block 1 | ... | data block n | filter block | metaindex block | index block | footer |
+ +--------------+--------------+--------------+--------------+-----------------+-------------+--------+
+
+ Each block followed by a 5-bytes trailer contains compression type and checksum.
+
+Table block trailer:
+
+ +---------------------------+-------------------+
+ | compression type (1-byte) | checksum (4-byte) |
+ +---------------------------+-------------------+
+
+ The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression
+ type also included in the checksum.
+
+Table footer:
+
+ +------------------- 40-bytes -------------------+
+ / \
+ +------------------------+--------------------+------+-----------------+
+ | metaindex block handle / index block handle / ---- | magic (8-bytes) |
+ +------------------------+--------------------+------+-----------------+
+
+ The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/".
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Block:
+
+Block is consist of one or more key/value entries and a block trailer.
+Block entry shares key prefix with its preceding key until a restart
+point reached. A block should contains at least one restart point.
+First restart point are always zero.
+
+Block data structure:
+
+ + restart point + restart point (depends on restart interval)
+ / /
+ +---------------+---------------+---------------+---------------+---------+
+ | block entry 1 | block entry 2 | ... | block entry n | trailer |
+ +---------------+---------------+---------------+---------------+---------+
+
+Key/value entry:
+
+ +---- key len ----+
+ / \
+ +-------+---------+-----------+---------+--------------------+--------------+----------------+
+ | shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) |
+ +-----------------+---------------------+--------------------+--------------+----------------+
+
+ Block entry shares key prefix with its preceding key:
+ Conditions:
+ restart_interval=2
+ entry one : key=deck,value=v1
+ entry two : key=dock,value=v2
+ entry three: key=duck,value=v3
+ The entries will be encoded as follow:
+
+ + restart point (offset=0) + restart point (offset=16)
+ / /
+ +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+ | 0 | 4 | 2 | "deck" | "v1" | 1 | 3 | 2 | "ock" | "v2" | 0 | 4 | 2 | "duck" | "v3" |
+ +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+ \ / \ / \ /
+ +----------- entry one -----------+ +----------- entry two ----------+ +---------- entry three ----------+
+
+ The block trailer will contains two restart points:
+
+ +------------+-----------+--------+
+ | 0 | 16 | 2 |
+ +------------+-----------+---+----+
+ \ / \
+ +-- restart points --+ + restart points length
+
+Block trailer:
+
+ +-- 4-bytes --+
+ / \
+ +-----------------+-----------------+-----------------+------------------------------+
+ | restart point 1 | .... | restart point n | restart points len (4-bytes) |
+ +-----------------+-----------------+-----------------+------------------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Filter block:
+
+Filter block consist of one or more filter data and a filter block trailer.
+The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg.
+
+Filter block data structure:
+
+ + offset 1 + offset 2 + offset n + trailer offset
+ / / / /
+ +---------------+---------------+---------------+---------+
+ | filter data 1 | ... | filter data n | trailer |
+ +---------------+---------------+---------------+---------+
+
+Filter block trailer:
+
+ +- 4-bytes -+
+ / \
+ +---------------+---------------+---------------+-------------------------------+------------------+
+ | data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
+ +-------------- +---------------+---------------+-------------------------------+------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+const (
+ blockTrailerLen = 5
+ footerLen = 48
+
+ magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb"
+
+ // The block type gives the per-block compression format.
+ // These constants are part of the file format and should not be changed.
+ blockTypeNoCompression = 0
+ blockTypeSnappyCompression = 1
+
+ // Generate new filter every 2KB of data
+ filterBaseLg = 11
+ filterBase = 1 << filterBaseLg
+)
+
+type blockHandle struct {
+ offset, length uint64
+}
+
+func decodeBlockHandle(src []byte) (blockHandle, int) {
+ offset, n := binary.Uvarint(src)
+ length, m := binary.Uvarint(src[n:])
+ if n == 0 || m == 0 {
+ return blockHandle{}, 0
+ }
+ return blockHandle{offset, length}, n + m
+}
+
+func encodeBlockHandle(dst []byte, b blockHandle) int {
+ n := binary.PutUvarint(dst, b.offset)
+ m := binary.PutUvarint(dst[n:], b.length)
+ return n + m
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
new file mode 100644
index 000000000..b96b271d8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
@@ -0,0 +1,375 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/golang/snappy"
+
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func sharedPrefixLen(a, b []byte) int {
+ i, n := 0, len(a)
+ if n > len(b) {
+ n = len(b)
+ }
+ for i < n && a[i] == b[i] {
+ i++
+ }
+ return i
+}
+
+type blockWriter struct {
+ restartInterval int
+ buf util.Buffer
+ nEntries int
+ prevKey []byte
+ restarts []uint32
+ scratch []byte
+}
+
+func (w *blockWriter) append(key, value []byte) {
+ nShared := 0
+ if w.nEntries%w.restartInterval == 0 {
+ w.restarts = append(w.restarts, uint32(w.buf.Len()))
+ } else {
+ nShared = sharedPrefixLen(w.prevKey, key)
+ }
+ n := binary.PutUvarint(w.scratch[0:], uint64(nShared))
+ n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared))
+ n += binary.PutUvarint(w.scratch[n:], uint64(len(value)))
+ w.buf.Write(w.scratch[:n])
+ w.buf.Write(key[nShared:])
+ w.buf.Write(value)
+ w.prevKey = append(w.prevKey[:0], key...)
+ w.nEntries++
+}
+
+func (w *blockWriter) finish() {
+ // Write restarts entry.
+ if w.nEntries == 0 {
+ // Must have at least one restart entry.
+ w.restarts = append(w.restarts, 0)
+ }
+ w.restarts = append(w.restarts, uint32(len(w.restarts)))
+ for _, x := range w.restarts {
+ buf4 := w.buf.Alloc(4)
+ binary.LittleEndian.PutUint32(buf4, x)
+ }
+}
+
+func (w *blockWriter) reset() {
+ w.buf.Reset()
+ w.nEntries = 0
+ w.restarts = w.restarts[:0]
+}
+
+func (w *blockWriter) bytesLen() int {
+ restartsLen := len(w.restarts)
+ if restartsLen == 0 {
+ restartsLen = 1
+ }
+ return w.buf.Len() + 4*restartsLen + 4
+}
+
+type filterWriter struct {
+ generator filter.FilterGenerator
+ buf util.Buffer
+ nKeys int
+ offsets []uint32
+}
+
+func (w *filterWriter) add(key []byte) {
+ if w.generator == nil {
+ return
+ }
+ w.generator.Add(key)
+ w.nKeys++
+}
+
+func (w *filterWriter) flush(offset uint64) {
+ if w.generator == nil {
+ return
+ }
+ for x := int(offset / filterBase); x > len(w.offsets); {
+ w.generate()
+ }
+}
+
+func (w *filterWriter) finish() {
+ if w.generator == nil {
+ return
+ }
+ // Generate last keys.
+
+ if w.nKeys > 0 {
+ w.generate()
+ }
+ w.offsets = append(w.offsets, uint32(w.buf.Len()))
+ for _, x := range w.offsets {
+ buf4 := w.buf.Alloc(4)
+ binary.LittleEndian.PutUint32(buf4, x)
+ }
+ w.buf.WriteByte(filterBaseLg)
+}
+
+func (w *filterWriter) generate() {
+ // Record offset.
+ w.offsets = append(w.offsets, uint32(w.buf.Len()))
+ // Generate filters.
+ if w.nKeys > 0 {
+ w.generator.Generate(&w.buf)
+ w.nKeys = 0
+ }
+}
+
+// Writer is a table writer.
+type Writer struct {
+ writer io.Writer
+ err error
+ // Options
+ cmp comparer.Comparer
+ filter filter.Filter
+ compression opt.Compression
+ blockSize int
+
+ dataBlock blockWriter
+ indexBlock blockWriter
+ filterBlock filterWriter
+ pendingBH blockHandle
+ offset uint64
+ nEntries int
+ // Scratch allocated enough for 5 uvarint. Block writer should not use
+ // first 20-bytes since it will be used to encode block handle, which
+ // then passed to the block writer itself.
+ scratch [50]byte
+ comparerScratch []byte
+ compressionScratch []byte
+}
+
+func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) {
+ // Compress the buffer if necessary.
+ var b []byte
+ if compression == opt.SnappyCompression {
+ // Allocate scratch enough for compression and block trailer.
+ if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n {
+ w.compressionScratch = make([]byte, n)
+ }
+ compressed := snappy.Encode(w.compressionScratch, buf.Bytes())
+ n := len(compressed)
+ b = compressed[:n+blockTrailerLen]
+ b[n] = blockTypeSnappyCompression
+ } else {
+ tmp := buf.Alloc(blockTrailerLen)
+ tmp[0] = blockTypeNoCompression
+ b = buf.Bytes()
+ }
+
+ // Calculate the checksum.
+ n := len(b) - 4
+ checksum := util.NewCRC(b[:n]).Value()
+ binary.LittleEndian.PutUint32(b[n:], checksum)
+
+ // Write the buffer to the file.
+ _, err = w.writer.Write(b)
+ if err != nil {
+ return
+ }
+ bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)}
+ w.offset += uint64(len(b))
+ return
+}
+
+func (w *Writer) flushPendingBH(key []byte) {
+ if w.pendingBH.length == 0 {
+ return
+ }
+ var separator []byte
+ if len(key) == 0 {
+ separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey)
+ } else {
+ separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key)
+ }
+ if separator == nil {
+ separator = w.dataBlock.prevKey
+ } else {
+ w.comparerScratch = separator
+ }
+ n := encodeBlockHandle(w.scratch[:20], w.pendingBH)
+ // Append the block handle to the index block.
+ w.indexBlock.append(separator, w.scratch[:n])
+ // Reset prev key of the data block.
+ w.dataBlock.prevKey = w.dataBlock.prevKey[:0]
+ // Clear pending block handle.
+ w.pendingBH = blockHandle{}
+}
+
+func (w *Writer) finishBlock() error {
+ w.dataBlock.finish()
+ bh, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+ if err != nil {
+ return err
+ }
+ w.pendingBH = bh
+ // Reset the data block.
+ w.dataBlock.reset()
+ // Flush the filter block.
+ w.filterBlock.flush(w.offset)
+ return nil
+}
+
+// Append appends key/value pair to the table. The keys passed must
+// be in increasing order.
+//
+// It is safe to modify the contents of the arguments after Append returns.
+func (w *Writer) Append(key, value []byte) error {
+ if w.err != nil {
+ return w.err
+ }
+ if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 {
+ w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key)
+ return w.err
+ }
+
+ w.flushPendingBH(key)
+ // Append key/value pair to the data block.
+ w.dataBlock.append(key, value)
+ // Add key to the filter block.
+ w.filterBlock.add(key)
+
+ // Finish the data block if block size target reached.
+ if w.dataBlock.bytesLen() >= w.blockSize {
+ if err := w.finishBlock(); err != nil {
+ w.err = err
+ return w.err
+ }
+ }
+ w.nEntries++
+ return nil
+}
+
+// BlocksLen returns number of blocks written so far.
+func (w *Writer) BlocksLen() int {
+ n := w.indexBlock.nEntries
+ if w.pendingBH.length > 0 {
+ // Includes the pending block.
+ n++
+ }
+ return n
+}
+
+// EntriesLen returns number of entries added so far.
+func (w *Writer) EntriesLen() int {
+ return w.nEntries
+}
+
+// BytesLen returns number of bytes written so far.
+func (w *Writer) BytesLen() int {
+ return int(w.offset)
+}
+
+// Close will finalize the table. Calling Append is not possible
+// after Close, but calling BlocksLen, EntriesLen and BytesLen
+// is still possible.
+func (w *Writer) Close() error {
+ if w.err != nil {
+ return w.err
+ }
+
+ // Write the last data block. Or empty data block if there
+ // aren't any data blocks at all.
+ if w.dataBlock.nEntries > 0 || w.nEntries == 0 {
+ if err := w.finishBlock(); err != nil {
+ w.err = err
+ return w.err
+ }
+ }
+ w.flushPendingBH(nil)
+
+ // Write the filter block.
+ var filterBH blockHandle
+ w.filterBlock.finish()
+ if buf := &w.filterBlock.buf; buf.Len() > 0 {
+ filterBH, w.err = w.writeBlock(buf, opt.NoCompression)
+ if w.err != nil {
+ return w.err
+ }
+ }
+
+ // Write the metaindex block.
+ if filterBH.length > 0 {
+ key := []byte("filter." + w.filter.Name())
+ n := encodeBlockHandle(w.scratch[:20], filterBH)
+ w.dataBlock.append(key, w.scratch[:n])
+ }
+ w.dataBlock.finish()
+ metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+ if err != nil {
+ w.err = err
+ return w.err
+ }
+
+ // Write the index block.
+ w.indexBlock.finish()
+ indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression)
+ if err != nil {
+ w.err = err
+ return w.err
+ }
+
+ // Write the table footer.
+ footer := w.scratch[:footerLen]
+ for i := range footer {
+ footer[i] = 0
+ }
+ n := encodeBlockHandle(footer, metaindexBH)
+ encodeBlockHandle(footer[n:], indexBH)
+ copy(footer[footerLen-len(magic):], magic)
+ if _, err := w.writer.Write(footer); err != nil {
+ w.err = err
+ return w.err
+ }
+ w.offset += footerLen
+
+ w.err = errors.New("leveldb/table: writer is closed")
+ return nil
+}
+
+// NewWriter creates a new initialized table writer for the file.
+//
+// Table writer is not safe for concurrent use.
+func NewWriter(f io.Writer, o *opt.Options) *Writer {
+ w := &Writer{
+ writer: f,
+ cmp: o.GetComparer(),
+ filter: o.GetFilter(),
+ compression: o.GetCompression(),
+ blockSize: o.GetBlockSize(),
+ comparerScratch: make([]byte, 0),
+ }
+ // data block
+ w.dataBlock.restartInterval = o.GetBlockRestartInterval()
+ // The first 20-bytes are used for encoding block handle.
+ w.dataBlock.scratch = w.scratch[20:]
+ // index block
+ w.indexBlock.restartInterval = 1
+ w.indexBlock.scratch = w.scratch[20:]
+ // filter block
+ if w.filter != nil {
+ w.filterBlock.generator = w.filter.NewGenerator()
+ w.filterBlock.flush(0)
+ }
+ return w
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
new file mode 100644
index 000000000..e572a329e
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
@@ -0,0 +1,98 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sort"
+
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+func shorten(str string) string {
+ if len(str) <= 8 {
+ return str
+ }
+ return str[:3] + ".." + str[len(str)-3:]
+}
+
+var bunits = [...]string{"", "Ki", "Mi", "Gi"}
+
+func shortenb(bytes int) string {
+ i := 0
+ for ; bytes > 1024 && i < 4; i++ {
+ bytes /= 1024
+ }
+ return fmt.Sprintf("%d%sB", bytes, bunits[i])
+}
+
+func sshortenb(bytes int) string {
+ if bytes == 0 {
+ return "~"
+ }
+ sign := "+"
+ if bytes < 0 {
+ sign = "-"
+ bytes *= -1
+ }
+ i := 0
+ for ; bytes > 1024 && i < 4; i++ {
+ bytes /= 1024
+ }
+ return fmt.Sprintf("%s%d%sB", sign, bytes, bunits[i])
+}
+
+func sint(x int) string {
+ if x == 0 {
+ return "~"
+ }
+ sign := "+"
+ if x < 0 {
+ sign = "-"
+ x *= -1
+ }
+ return fmt.Sprintf("%s%d", sign, x)
+}
+
+func minInt(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
+func maxInt(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
+
+type fdSorter []storage.FileDesc
+
+func (p fdSorter) Len() int {
+ return len(p)
+}
+
+func (p fdSorter) Less(i, j int) bool {
+ return p[i].Num < p[j].Num
+}
+
+func (p fdSorter) Swap(i, j int) {
+ p[i], p[j] = p[j], p[i]
+}
+
+func sortFds(fds []storage.FileDesc) {
+ sort.Sort(fdSorter(fds))
+}
+
+func ensureBuffer(b []byte, n int) []byte {
+ if cap(b) < n {
+ return make([]byte, n)
+ }
+ return b[:n]
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
new file mode 100644
index 000000000..21de24255
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
@@ -0,0 +1,293 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+// This a copy of Go std bytes.Buffer with some modification
+// and some features stripped.
+
+import (
+ "bytes"
+ "io"
+)
+
+// A Buffer is a variable-sized buffer of bytes with Read and Write methods.
+// The zero value for Buffer is an empty buffer ready to use.
+type Buffer struct {
+ buf []byte // contents are the bytes buf[off : len(buf)]
+ off int // read at &buf[off], write at &buf[len(buf)]
+ bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation.
+}
+
+// Bytes returns a slice of the contents of the unread portion of the buffer;
+// len(b.Bytes()) == b.Len(). If the caller changes the contents of the
+// returned slice, the contents of the buffer will change provided there
+// are no intervening method calls on the Buffer.
+func (b *Buffer) Bytes() []byte { return b.buf[b.off:] }
+
+// String returns the contents of the unread portion of the buffer
+// as a string. If the Buffer is a nil pointer, it returns "<nil>".
+func (b *Buffer) String() string {
+ if b == nil {
+ // Special case, useful in debugging.
+ return "<nil>"
+ }
+ return string(b.buf[b.off:])
+}
+
+// Len returns the number of bytes of the unread portion of the buffer;
+// b.Len() == len(b.Bytes()).
+func (b *Buffer) Len() int { return len(b.buf) - b.off }
+
+// Truncate discards all but the first n unread bytes from the buffer.
+// It panics if n is negative or greater than the length of the buffer.
+func (b *Buffer) Truncate(n int) {
+ switch {
+ case n < 0 || n > b.Len():
+ panic("leveldb/util.Buffer: truncation out of range")
+ case n == 0:
+ // Reuse buffer space.
+ b.off = 0
+ }
+ b.buf = b.buf[0 : b.off+n]
+}
+
+// Reset resets the buffer so it has no content.
+// b.Reset() is the same as b.Truncate(0).
+func (b *Buffer) Reset() { b.Truncate(0) }
+
+// grow grows the buffer to guarantee space for n more bytes.
+// It returns the index where bytes should be written.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) grow(n int) int {
+ m := b.Len()
+ // If buffer is empty, reset to recover space.
+ if m == 0 && b.off != 0 {
+ b.Truncate(0)
+ }
+ if len(b.buf)+n > cap(b.buf) {
+ var buf []byte
+ if b.buf == nil && n <= len(b.bootstrap) {
+ buf = b.bootstrap[0:]
+ } else if m+n <= cap(b.buf)/2 {
+ // We can slide things down instead of allocating a new
+ // slice. We only need m+n <= cap(b.buf) to slide, but
+ // we instead let capacity get twice as large so we
+ // don't spend all our time copying.
+ copy(b.buf[:], b.buf[b.off:])
+ buf = b.buf[:m]
+ } else {
+ // not enough space anywhere
+ buf = makeSlice(2*cap(b.buf) + n)
+ copy(buf, b.buf[b.off:])
+ }
+ b.buf = buf
+ b.off = 0
+ }
+ b.buf = b.buf[0 : b.off+m+n]
+ return b.off + m
+}
+
+// Alloc allocs n bytes of slice from the buffer, growing the buffer as
+// needed. If n is negative, Alloc will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Alloc(n int) []byte {
+ if n < 0 {
+ panic("leveldb/util.Buffer.Alloc: negative count")
+ }
+ m := b.grow(n)
+ return b.buf[m:]
+}
+
+// Grow grows the buffer's capacity, if necessary, to guarantee space for
+// another n bytes. After Grow(n), at least n bytes can be written to the
+// buffer without another allocation.
+// If n is negative, Grow will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Grow(n int) {
+ if n < 0 {
+ panic("leveldb/util.Buffer.Grow: negative count")
+ }
+ m := b.grow(n)
+ b.buf = b.buf[0:m]
+}
+
+// Write appends the contents of p to the buffer, growing the buffer as
+// needed. The return value n is the length of p; err is always nil. If the
+// buffer becomes too large, Write will panic with bytes.ErrTooLarge.
+func (b *Buffer) Write(p []byte) (n int, err error) {
+ m := b.grow(len(p))
+ return copy(b.buf[m:], p), nil
+}
+
+// MinRead is the minimum slice size passed to a Read call by
+// Buffer.ReadFrom. As long as the Buffer has at least MinRead bytes beyond
+// what is required to hold the contents of r, ReadFrom will not grow the
+// underlying buffer.
+const MinRead = 512
+
+// ReadFrom reads data from r until EOF and appends it to the buffer, growing
+// the buffer as needed. The return value n is the number of bytes read. Any
+// error except io.EOF encountered during the read is also returned. If the
+// buffer becomes too large, ReadFrom will panic with bytes.ErrTooLarge.
+func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) {
+ // If buffer is empty, reset to recover space.
+ if b.off >= len(b.buf) {
+ b.Truncate(0)
+ }
+ for {
+ if free := cap(b.buf) - len(b.buf); free < MinRead {
+ // not enough space at end
+ newBuf := b.buf
+ if b.off+free < MinRead {
+ // not enough space using beginning of buffer;
+ // double buffer capacity
+ newBuf = makeSlice(2*cap(b.buf) + MinRead)
+ }
+ copy(newBuf, b.buf[b.off:])
+ b.buf = newBuf[:len(b.buf)-b.off]
+ b.off = 0
+ }
+ m, e := r.Read(b.buf[len(b.buf):cap(b.buf)])
+ b.buf = b.buf[0 : len(b.buf)+m]
+ n += int64(m)
+ if e == io.EOF {
+ break
+ }
+ if e != nil {
+ return n, e
+ }
+ }
+ return n, nil // err is EOF, so return nil explicitly
+}
+
+// makeSlice allocates a slice of size n. If the allocation fails, it panics
+// with bytes.ErrTooLarge.
+func makeSlice(n int) []byte {
+ // If the make fails, give a known error.
+ defer func() {
+ if recover() != nil {
+ panic(bytes.ErrTooLarge)
+ }
+ }()
+ return make([]byte, n)
+}
+
+// WriteTo writes data to w until the buffer is drained or an error occurs.
+// The return value n is the number of bytes written; it always fits into an
+// int, but it is int64 to match the io.WriterTo interface. Any error
+// encountered during the write is also returned.
+func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) {
+ if b.off < len(b.buf) {
+ nBytes := b.Len()
+ m, e := w.Write(b.buf[b.off:])
+ if m > nBytes {
+ panic("leveldb/util.Buffer.WriteTo: invalid Write count")
+ }
+ b.off += m
+ n = int64(m)
+ if e != nil {
+ return n, e
+ }
+ // all bytes should have been written, by definition of
+ // Write method in io.Writer
+ if m != nBytes {
+ return n, io.ErrShortWrite
+ }
+ }
+ // Buffer is now empty; reset.
+ b.Truncate(0)
+ return
+}
+
+// WriteByte appends the byte c to the buffer, growing the buffer as needed.
+// The returned error is always nil, but is included to match bufio.Writer's
+// WriteByte. If the buffer becomes too large, WriteByte will panic with
+// bytes.ErrTooLarge.
+func (b *Buffer) WriteByte(c byte) error {
+ m := b.grow(1)
+ b.buf[m] = c
+ return nil
+}
+
+// Read reads the next len(p) bytes from the buffer or until the buffer
+// is drained. The return value n is the number of bytes read. If the
+// buffer has no data to return, err is io.EOF (unless len(p) is zero);
+// otherwise it is nil.
+func (b *Buffer) Read(p []byte) (n int, err error) {
+ if b.off >= len(b.buf) {
+ // Buffer is empty, reset to recover space.
+ b.Truncate(0)
+ if len(p) == 0 {
+ return
+ }
+ return 0, io.EOF
+ }
+ n = copy(p, b.buf[b.off:])
+ b.off += n
+ return
+}
+
+// Next returns a slice containing the next n bytes from the buffer,
+// advancing the buffer as if the bytes had been returned by Read.
+// If there are fewer than n bytes in the buffer, Next returns the entire buffer.
+// The slice is only valid until the next call to a read or write method.
+func (b *Buffer) Next(n int) []byte {
+ m := b.Len()
+ if n > m {
+ n = m
+ }
+ data := b.buf[b.off : b.off+n]
+ b.off += n
+ return data
+}
+
+// ReadByte reads and returns the next byte from the buffer.
+// If no byte is available, it returns error io.EOF.
+func (b *Buffer) ReadByte() (c byte, err error) {
+ if b.off >= len(b.buf) {
+ // Buffer is empty, reset to recover space.
+ b.Truncate(0)
+ return 0, io.EOF
+ }
+ c = b.buf[b.off]
+ b.off++
+ return c, nil
+}
+
+// ReadBytes reads until the first occurrence of delim in the input,
+// returning a slice containing the data up to and including the delimiter.
+// If ReadBytes encounters an error before finding a delimiter,
+// it returns the data read before the error and the error itself (often io.EOF).
+// ReadBytes returns err != nil if and only if the returned data does not end in
+// delim.
+func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) {
+ slice, err := b.readSlice(delim)
+ // return a copy of slice. The buffer's backing array may
+ // be overwritten by later calls.
+ line = append(line, slice...)
+ return
+}
+
+// readSlice is like ReadBytes but returns a reference to internal buffer data.
+func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
+ i := bytes.IndexByte(b.buf[b.off:], delim)
+ end := b.off + i + 1
+ if i < 0 {
+ end = len(b.buf)
+ err = io.EOF
+ }
+ line = b.buf[b.off:end]
+ b.off = end
+ return line, err
+}
+
+// NewBuffer creates and initializes a new Buffer using buf as its initial
+// contents. It is intended to prepare a Buffer to read existing data. It
+// can also be used to size the internal buffer for writing. To do that,
+// buf should have the desired capacity but a length of zero.
+//
+// In most cases, new(Buffer) (or just declaring a Buffer variable) is
+// sufficient to initialize a Buffer.
+func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
new file mode 100644
index 000000000..2f3db974a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
@@ -0,0 +1,239 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type buffer struct {
+ b []byte
+ miss int
+}
+
+// BufferPool is a 'buffer pool'.
+type BufferPool struct {
+ pool [6]chan []byte
+ size [5]uint32
+ sizeMiss [5]uint32
+ sizeHalf [5]uint32
+ baseline [4]int
+ baseline0 int
+
+ mu sync.RWMutex
+ closed bool
+ closeC chan struct{}
+
+ get uint32
+ put uint32
+ half uint32
+ less uint32
+ equal uint32
+ greater uint32
+ miss uint32
+}
+
+func (p *BufferPool) poolNum(n int) int {
+ if n <= p.baseline0 && n > p.baseline0/2 {
+ return 0
+ }
+ for i, x := range p.baseline {
+ if n <= x {
+ return i + 1
+ }
+ }
+ return len(p.baseline) + 1
+}
+
+// Get returns buffer with length of n.
+func (p *BufferPool) Get(n int) []byte {
+ if p == nil {
+ return make([]byte, n)
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return make([]byte, n)
+ }
+
+ atomic.AddUint32(&p.get, 1)
+
+ poolNum := p.poolNum(n)
+ pool := p.pool[poolNum]
+ if poolNum == 0 {
+ // Fast path.
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ select {
+ case pool <- b:
+ default:
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ return make([]byte, n, p.baseline0)
+ } else {
+ sizePtr := &p.size[poolNum-1]
+
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ sizeHalfPtr := &p.sizeHalf[poolNum-1]
+ if atomic.AddUint32(sizeHalfPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(cap(b)/2))
+ atomic.StoreUint32(sizeHalfPtr, 0)
+ } else {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ if uint32(cap(b)) >= atomic.LoadUint32(sizePtr) {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ if size := atomic.LoadUint32(sizePtr); uint32(n) > size {
+ if size == 0 {
+ atomic.CompareAndSwapUint32(sizePtr, 0, uint32(n))
+ } else {
+ sizeMissPtr := &p.sizeMiss[poolNum-1]
+ if atomic.AddUint32(sizeMissPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(n))
+ atomic.StoreUint32(sizeMissPtr, 0)
+ }
+ }
+ return make([]byte, n)
+ } else {
+ return make([]byte, n, size)
+ }
+ }
+}
+
+// Put adds given buffer to the pool.
+func (p *BufferPool) Put(b []byte) {
+ if p == nil {
+ return
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return
+ }
+
+ atomic.AddUint32(&p.put, 1)
+
+ pool := p.pool[p.poolNum(cap(b))]
+ select {
+ case pool <- b:
+ default:
+ }
+
+}
+
+func (p *BufferPool) Close() {
+ if p == nil {
+ return
+ }
+
+ p.mu.Lock()
+ if !p.closed {
+ p.closed = true
+ p.closeC <- struct{}{}
+ }
+ p.mu.Unlock()
+}
+
+func (p *BufferPool) String() string {
+ if p == nil {
+ return "<nil>"
+ }
+
+ return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}",
+ p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss)
+}
+
+func (p *BufferPool) drain() {
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ticker.C:
+ for _, ch := range p.pool {
+ select {
+ case <-ch:
+ default:
+ }
+ }
+ case <-p.closeC:
+ close(p.closeC)
+ for _, ch := range p.pool {
+ close(ch)
+ }
+ return
+ }
+ }
+}
+
+// NewBufferPool creates a new initialized 'buffer pool'.
+func NewBufferPool(baseline int) *BufferPool {
+ if baseline <= 0 {
+ panic("baseline can't be <= 0")
+ }
+ p := &BufferPool{
+ baseline0: baseline,
+ baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4},
+ closeC: make(chan struct{}, 1),
+ }
+ for i, cap := range []int{2, 2, 4, 4, 2, 1} {
+ p.pool[i] = make(chan []byte, cap)
+ }
+ go p.drain()
+ return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
new file mode 100644
index 000000000..631c9d610
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
@@ -0,0 +1,30 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "hash/crc32"
+)
+
+var table = crc32.MakeTable(crc32.Castagnoli)
+
+// CRC is a CRC-32 checksum computed using Castagnoli's polynomial.
+type CRC uint32
+
+// NewCRC creates a new crc based on the given bytes.
+func NewCRC(b []byte) CRC {
+ return CRC(0).Update(b)
+}
+
+// Update updates the crc with the given bytes.
+func (c CRC) Update(b []byte) CRC {
+ return CRC(crc32.Update(uint32(c), table, b))
+}
+
+// Value returns a masked crc.
+func (c CRC) Value() uint32 {
+ return uint32(c>>15|c<<17) + 0xa282ead8
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
new file mode 100644
index 000000000..7f3fa4e2c
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
@@ -0,0 +1,48 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "encoding/binary"
+)
+
+// Hash return hash of the given data.
+func Hash(data []byte, seed uint32) uint32 {
+ // Similar to murmur hash
+ const (
+ m = uint32(0xc6a4a793)
+ r = uint32(24)
+ )
+ var (
+ h = seed ^ (uint32(len(data)) * m)
+ i int
+ )
+
+ for n := len(data) - len(data)%4; i < n; i += 4 {
+ h += binary.LittleEndian.Uint32(data[i:])
+ h *= m
+ h ^= (h >> 16)
+ }
+
+ switch len(data) - i {
+ default:
+ panic("not reached")
+ case 3:
+ h += uint32(data[i+2]) << 16
+ fallthrough
+ case 2:
+ h += uint32(data[i+1]) << 8
+ fallthrough
+ case 1:
+ h += uint32(data[i])
+ h *= m
+ h ^= (h >> r)
+ case 0:
+ }
+
+ return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
new file mode 100644
index 000000000..85159583d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
@@ -0,0 +1,32 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+// Range is a key range.
+type Range struct {
+ // Start of the key range, include in the range.
+ Start []byte
+
+ // Limit of the key range, not include in the range.
+ Limit []byte
+}
+
+// BytesPrefix returns key range that satisfy the given prefix.
+// This only applicable for the standard 'bytes comparer'.
+func BytesPrefix(prefix []byte) *Range {
+ var limit []byte
+ for i := len(prefix) - 1; i >= 0; i-- {
+ c := prefix[i]
+ if c < 0xff {
+ limit = make([]byte, i+1)
+ copy(limit, prefix)
+ limit[i] = c + 1
+ break
+ }
+ }
+ return &Range{prefix, limit}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
new file mode 100644
index 000000000..f35976865
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
@@ -0,0 +1,73 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package util provides utilities used throughout leveldb.
+package util
+
+import (
+ "errors"
+)
+
+var (
+ ErrReleased = errors.New("leveldb: resource already relesed")
+ ErrHasReleaser = errors.New("leveldb: releaser already defined")
+)
+
+// Releaser is the interface that wraps the basic Release method.
+type Releaser interface {
+ // Release releases associated resources. Release should always success
+ // and can be called multipe times without causing error.
+ Release()
+}
+
+// ReleaseSetter is the interface that wraps the basic SetReleaser method.
+type ReleaseSetter interface {
+ // SetReleaser associates the given releaser to the resources. The
+ // releaser will be called once coresponding resources released.
+ // Calling SetReleaser with nil will clear the releaser.
+ //
+ // This will panic if a releaser already present or coresponding
+ // resource is already released. Releaser should be cleared first
+ // before assigned a new one.
+ SetReleaser(releaser Releaser)
+}
+
+// BasicReleaser provides basic implementation of Releaser and ReleaseSetter.
+type BasicReleaser struct {
+ releaser Releaser
+ released bool
+}
+
+// Released returns whether Release method already called.
+func (r *BasicReleaser) Released() bool {
+ return r.released
+}
+
+// Release implements Releaser.Release.
+func (r *BasicReleaser) Release() {
+ if !r.released {
+ if r.releaser != nil {
+ r.releaser.Release()
+ r.releaser = nil
+ }
+ r.released = true
+ }
+}
+
+// SetReleaser implements ReleaseSetter.SetReleaser.
+func (r *BasicReleaser) SetReleaser(releaser Releaser) {
+ if r.released {
+ panic(ErrReleased)
+ }
+ if r.releaser != nil && releaser != nil {
+ panic(ErrHasReleaser)
+ }
+ r.releaser = releaser
+}
+
+type NoopReleaser struct{}
+
+func (NoopReleaser) Release() {}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/version.go b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
new file mode 100644
index 000000000..c60f12c20
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
@@ -0,0 +1,524 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sync/atomic"
+ "unsafe"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type tSet struct {
+ level int
+ table *tFile
+}
+
+type version struct {
+ s *session
+
+ levels []tFiles
+
+ // Level that should be compacted next and its compaction score.
+ // Score < 1 means compaction is not strictly needed. These fields
+ // are initialized by computeCompaction()
+ cLevel int
+ cScore float64
+
+ cSeek unsafe.Pointer
+
+ closing bool
+ ref int
+ // Succeeding version.
+ next *version
+}
+
+func newVersion(s *session) *version {
+ return &version{s: s}
+}
+
+func (v *version) releaseNB() {
+ v.ref--
+ if v.ref > 0 {
+ return
+ }
+ if v.ref < 0 {
+ panic("negative version ref")
+ }
+
+ nextTables := make(map[int64]bool)
+ for _, tt := range v.next.levels {
+ for _, t := range tt {
+ num := t.fd.Num
+ nextTables[num] = true
+ }
+ }
+
+ for _, tt := range v.levels {
+ for _, t := range tt {
+ num := t.fd.Num
+ if _, ok := nextTables[num]; !ok {
+ v.s.tops.remove(t)
+ }
+ }
+ }
+
+ v.next.releaseNB()
+ v.next = nil
+}
+
+func (v *version) release() {
+ v.s.vmu.Lock()
+ v.releaseNB()
+ v.s.vmu.Unlock()
+}
+
+func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
+ ukey := ikey.ukey()
+
+ // Aux level.
+ if aux != nil {
+ for _, t := range aux {
+ if t.overlaps(v.s.icmp, ukey, ukey) {
+ if !f(-1, t) {
+ return
+ }
+ }
+ }
+
+ if lf != nil && !lf(-1) {
+ return
+ }
+ }
+
+ // Walk tables level-by-level.
+ for level, tables := range v.levels {
+ if len(tables) == 0 {
+ continue
+ }
+
+ if level == 0 {
+ // Level-0 files may overlap each other. Find all files that
+ // overlap ukey.
+ for _, t := range tables {
+ if t.overlaps(v.s.icmp, ukey, ukey) {
+ if !f(level, t) {
+ return
+ }
+ }
+ }
+ } else {
+ if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
+ t := tables[i]
+ if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ if !f(level, t) {
+ return
+ }
+ }
+ }
+ }
+
+ if lf != nil && !lf(level) {
+ return
+ }
+ }
+}
+
+func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
+ if v.closing {
+ return nil, false, ErrClosed
+ }
+
+ ukey := ikey.ukey()
+
+ var (
+ tset *tSet
+ tseek bool
+
+ // Level-0.
+ zfound bool
+ zseq uint64
+ zkt keyType
+ zval []byte
+ )
+
+ err = ErrNotFound
+
+ // Since entries never hop across level, finding key/value
+ // in smaller level make later levels irrelevant.
+ v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
+ if level >= 0 && !tseek {
+ if tset == nil {
+ tset = &tSet{level, t}
+ } else {
+ tseek = true
+ }
+ }
+
+ var (
+ fikey, fval []byte
+ ferr error
+ )
+ if noValue {
+ fikey, ferr = v.s.tops.findKey(t, ikey, ro)
+ } else {
+ fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
+ }
+
+ switch ferr {
+ case nil:
+ case ErrNotFound:
+ return true
+ default:
+ err = ferr
+ return false
+ }
+
+ if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
+ if v.s.icmp.uCompare(ukey, fukey) == 0 {
+ // Level <= 0 may overlaps each-other.
+ if level <= 0 {
+ if fseq >= zseq {
+ zfound = true
+ zseq = fseq
+ zkt = fkt
+ zval = fval
+ }
+ } else {
+ switch fkt {
+ case keyTypeVal:
+ value = fval
+ err = nil
+ case keyTypeDel:
+ default:
+ panic("leveldb: invalid internalKey type")
+ }
+ return false
+ }
+ }
+ } else {
+ err = fkerr
+ return false
+ }
+
+ return true
+ }, func(level int) bool {
+ if zfound {
+ switch zkt {
+ case keyTypeVal:
+ value = zval
+ err = nil
+ case keyTypeDel:
+ default:
+ panic("leveldb: invalid internalKey type")
+ }
+ return false
+ }
+
+ return true
+ })
+
+ if tseek && tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+ }
+
+ return
+}
+
+func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
+ var tset *tSet
+
+ v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
+ if tset == nil {
+ tset = &tSet{level, t}
+ return true
+ }
+ if tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+ }
+ return false
+ }, nil)
+
+ return
+}
+
+func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
+ strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
+ for level, tables := range v.levels {
+ if level == 0 {
+ // Merge all level zero files together since they may overlap.
+ for _, t := range tables {
+ its = append(its, v.s.tops.newIterator(t, slice, ro))
+ }
+ } else if len(tables) != 0 {
+ its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
+ }
+ }
+ return
+}
+
+func (v *version) newStaging() *versionStaging {
+ return &versionStaging{base: v}
+}
+
+// Spawn a new version based on this version.
+func (v *version) spawn(r *sessionRecord) *version {
+ staging := v.newStaging()
+ staging.commit(r)
+ return staging.finish()
+}
+
+func (v *version) fillRecord(r *sessionRecord) {
+ for level, tables := range v.levels {
+ for _, t := range tables {
+ r.addTableFile(level, t)
+ }
+ }
+}
+
+func (v *version) tLen(level int) int {
+ if level < len(v.levels) {
+ return len(v.levels[level])
+ }
+ return 0
+}
+
+func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
+ for level, tables := range v.levels {
+ for _, t := range tables {
+ if v.s.icmp.Compare(t.imax, ikey) <= 0 {
+ // Entire file is before "ikey", so just add the file size
+ n += t.size
+ } else if v.s.icmp.Compare(t.imin, ikey) > 0 {
+ // Entire file is after "ikey", so ignore
+ if level > 0 {
+ // Files other than level 0 are sorted by meta->min, so
+ // no further files in this level will contain data for
+ // "ikey".
+ break
+ }
+ } else {
+ // "ikey" falls in the range for this table. Add the
+ // approximate offset of "ikey" within the table.
+ if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
+ n += m
+ } else {
+ return 0, err
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
+ if maxLevel > 0 {
+ if len(v.levels) == 0 {
+ return maxLevel
+ }
+ if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
+ var overlaps tFiles
+ for ; level < maxLevel; level++ {
+ if pLevel := level + 1; pLevel >= len(v.levels) {
+ return maxLevel
+ } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
+ break
+ }
+ if gpLevel := level + 2; gpLevel < len(v.levels) {
+ overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
+ if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
+ break
+ }
+ }
+ }
+ }
+ }
+ return
+}
+
+func (v *version) computeCompaction() {
+ // Precomputed best level for next compaction
+ bestLevel := int(-1)
+ bestScore := float64(-1)
+
+ statFiles := make([]int, len(v.levels))
+ statSizes := make([]string, len(v.levels))
+ statScore := make([]string, len(v.levels))
+ statTotSize := int64(0)
+
+ for level, tables := range v.levels {
+ var score float64
+ size := tables.size()
+ if level == 0 {
+ // We treat level-0 specially by bounding the number of files
+ // instead of number of bytes for two reasons:
+ //
+ // (1) With larger write-buffer sizes, it is nice not to do too
+ // many level-0 compaction.
+ //
+ // (2) The files in level-0 are merged on every read and
+ // therefore we wish to avoid too many files when the individual
+ // file size is small (perhaps because of a small write-buffer
+ // setting, or very high compression ratios, or lots of
+ // overwrites/deletions).
+ score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
+ } else {
+ score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
+ }
+
+ if score > bestScore {
+ bestLevel = level
+ bestScore = score
+ }
+
+ statFiles[level] = len(tables)
+ statSizes[level] = shortenb(int(size))
+ statScore[level] = fmt.Sprintf("%.2f", score)
+ statTotSize += size
+ }
+
+ v.cLevel = bestLevel
+ v.cScore = bestScore
+
+ v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
+}
+
+func (v *version) needCompaction() bool {
+ return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
+}
+
+type tablesScratch struct {
+ added map[int64]atRecord
+ deleted map[int64]struct{}
+}
+
+type versionStaging struct {
+ base *version
+ levels []tablesScratch
+}
+
+func (p *versionStaging) getScratch(level int) *tablesScratch {
+ if level >= len(p.levels) {
+ newLevels := make([]tablesScratch, level+1)
+ copy(newLevels, p.levels)
+ p.levels = newLevels
+ }
+ return &(p.levels[level])
+}
+
+func (p *versionStaging) commit(r *sessionRecord) {
+ // Deleted tables.
+ for _, r := range r.deletedTables {
+ scratch := p.getScratch(r.level)
+ if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
+ if scratch.deleted == nil {
+ scratch.deleted = make(map[int64]struct{})
+ }
+ scratch.deleted[r.num] = struct{}{}
+ }
+ if scratch.added != nil {
+ delete(scratch.added, r.num)
+ }
+ }
+
+ // New tables.
+ for _, r := range r.addedTables {
+ scratch := p.getScratch(r.level)
+ if scratch.added == nil {
+ scratch.added = make(map[int64]atRecord)
+ }
+ scratch.added[r.num] = r
+ if scratch.deleted != nil {
+ delete(scratch.deleted, r.num)
+ }
+ }
+}
+
+func (p *versionStaging) finish() *version {
+ // Build new version.
+ nv := newVersion(p.base.s)
+ numLevel := len(p.levels)
+ if len(p.base.levels) > numLevel {
+ numLevel = len(p.base.levels)
+ }
+ nv.levels = make([]tFiles, numLevel)
+ for level := 0; level < numLevel; level++ {
+ var baseTabels tFiles
+ if level < len(p.base.levels) {
+ baseTabels = p.base.levels[level]
+ }
+
+ if level < len(p.levels) {
+ scratch := p.levels[level]
+
+ var nt tFiles
+ // Prealloc list if possible.
+ if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
+ nt = make(tFiles, 0, n)
+ }
+
+ // Base tables.
+ for _, t := range baseTabels {
+ if _, ok := scratch.deleted[t.fd.Num]; ok {
+ continue
+ }
+ if _, ok := scratch.added[t.fd.Num]; ok {
+ continue
+ }
+ nt = append(nt, t)
+ }
+
+ // New tables.
+ for _, r := range scratch.added {
+ nt = append(nt, tableFileFromRecord(r))
+ }
+
+ if len(nt) != 0 {
+ // Sort tables.
+ if level == 0 {
+ nt.sortByNum()
+ } else {
+ nt.sortByKey(p.base.s.icmp)
+ }
+
+ nv.levels[level] = nt
+ }
+ } else {
+ nv.levels[level] = baseTabels
+ }
+ }
+
+ // Trim levels.
+ n := len(nv.levels)
+ for ; n > 0 && nv.levels[n-1] == nil; n-- {
+ }
+ nv.levels = nv.levels[:n]
+
+ // Compute compaction score for new version.
+ nv.computeCompaction()
+
+ return nv
+}
+
+type versionReleaser struct {
+ v *version
+ once bool
+}
+
+func (vr *versionReleaser) Release() {
+ v := vr.v
+ v.s.vmu.Lock()
+ if !vr.once {
+ v.releaseNB()
+ vr.once = true
+ }
+ v.s.vmu.Unlock()
+}