aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/syndtr
diff options
context:
space:
mode:
authorPéter Szilágyi <peterke@gmail.com>2016-10-29 01:05:01 +0800
committerFelix Lange <fjl@twurst.com>2016-10-29 01:05:01 +0800
commit289b30715d097edafd5562f66cb3567a70b2d330 (patch)
tree7eaaa6da97c84727469303b986e364606ece57ce /vendor/github.com/syndtr
parent77703045765343c489ded2f43e3ed0f332c5f148 (diff)
downloaddexon-289b30715d097edafd5562f66cb3567a70b2d330.tar
dexon-289b30715d097edafd5562f66cb3567a70b2d330.tar.gz
dexon-289b30715d097edafd5562f66cb3567a70b2d330.tar.bz2
dexon-289b30715d097edafd5562f66cb3567a70b2d330.tar.lz
dexon-289b30715d097edafd5562f66cb3567a70b2d330.tar.xz
dexon-289b30715d097edafd5562f66cb3567a70b2d330.tar.zst
dexon-289b30715d097edafd5562f66cb3567a70b2d330.zip
Godeps, vendor: convert dependency management to trash (#3198)
This commit converts the dependency management from Godeps to the vendor folder, also switching the tool from godep to trash. Since the upstream tool lacks a few features proposed via a few PRs, until those PRs are merged in (if), use github.com/karalabe/trash. You can update dependencies via trash --update. All dependencies have been updated to their latest version. Parts of the build system are reworked to drop old notions of Godeps and invocation of the go vet command so that it doesn't run against the vendor folder, as that will just blow up during vetting. The conversion drops OpenCL (and hence GPU mining support) from ethash and our codebase. The short reasoning is that there's noone to maintain and having opencl libs in our deps messes up builds as go install ./... tries to build them, failing with unsatisfied link errors for the C OpenCL deps. golang.org/x/net/context is not vendored in. We expect it to be fetched by the user (i.e. using go get). To keep ci.go builds reproducible the package is "vendored" in build/_vendor.
Diffstat (limited to 'vendor/github.com/syndtr')
-rw-r--r--vendor/github.com/syndtr/goleveldb/.travis.yml12
-rw-r--r--vendor/github.com/syndtr/goleveldb/LICENSE24
-rw-r--r--vendor/github.com/syndtr/goleveldb/README.md105
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/batch.go349
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go705
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go195
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/comparer.go67
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go51
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go57
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db.go1091
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go826
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go360
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go183
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_state.go234
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go325
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_util.go102
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/db_write.go443
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/doc.go90
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/errors.go20
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go78
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/filter.go31
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go116
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go60
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go184
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go242
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go132
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go304
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go524
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/key.go143
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go475
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go684
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/options.go107
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session.go208
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go302
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session_record.go323
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/session_util.go258
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go583
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go34
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go65
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go81
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go86
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go78
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go218
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go179
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table.go529
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go1134
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table/table.go177
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go375
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util.go98
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go293
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go239
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go30
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go48
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/range.go32
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/util/util.go73
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/version.go524
56 files changed, 14286 insertions, 0 deletions
diff --git a/vendor/github.com/syndtr/goleveldb/.travis.yml b/vendor/github.com/syndtr/goleveldb/.travis.yml
new file mode 100644
index 000000000..82de37735
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/.travis.yml
@@ -0,0 +1,12 @@
+language: go
+
+go:
+ - 1.4
+ - 1.5
+ - 1.6
+ - 1.7
+ - tip
+
+script:
+ - go test -timeout 1h ./...
+ - go test -timeout 30m -race -run "TestDB_(Concurrent|GoleveldbIssue74)" ./leveldb
diff --git a/vendor/github.com/syndtr/goleveldb/LICENSE b/vendor/github.com/syndtr/goleveldb/LICENSE
new file mode 100644
index 000000000..4a772d1ab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/LICENSE
@@ -0,0 +1,24 @@
+Copyright 2012 Suryandaru Triandana <syndtr@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/syndtr/goleveldb/README.md b/vendor/github.com/syndtr/goleveldb/README.md
new file mode 100644
index 000000000..259286f55
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/README.md
@@ -0,0 +1,105 @@
+This is an implementation of the [LevelDB key/value database](http:code.google.com/p/leveldb) in the [Go programming language](http:golang.org).
+
+[![Build Status](https://travis-ci.org/syndtr/goleveldb.png?branch=master)](https://travis-ci.org/syndtr/goleveldb)
+
+Installation
+-----------
+
+ go get github.com/syndtr/goleveldb/leveldb
+
+Requirements
+-----------
+
+* Need at least `go1.4` or newer.
+
+Usage
+-----------
+
+Create or open a database:
+```go
+db, err := leveldb.OpenFile("path/to/db", nil)
+...
+defer db.Close()
+...
+```
+Read or modify the database content:
+```go
+// Remember that the contents of the returned slice should not be modified.
+data, err := db.Get([]byte("key"), nil)
+...
+err = db.Put([]byte("key"), []byte("value"), nil)
+...
+err = db.Delete([]byte("key"), nil)
+...
+```
+
+Iterate over database content:
+```go
+iter := db.NewIterator(nil, nil)
+for iter.Next() {
+ // Remember that the contents of the returned slice should not be modified, and
+ // only valid until the next call to Next.
+ key := iter.Key()
+ value := iter.Value()
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Seek-then-Iterate:
+```go
+iter := db.NewIterator(nil, nil)
+for ok := iter.Seek(key); ok; ok = iter.Next() {
+ // Use key/value.
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Iterate over subset of database content:
+```go
+iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil)
+for iter.Next() {
+ // Use key/value.
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Iterate over subset of database content with a particular prefix:
+```go
+iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
+for iter.Next() {
+ // Use key/value.
+ ...
+}
+iter.Release()
+err = iter.Error()
+...
+```
+Batch writes:
+```go
+batch := new(leveldb.Batch)
+batch.Put([]byte("foo"), []byte("value"))
+batch.Put([]byte("bar"), []byte("another value"))
+batch.Delete([]byte("baz"))
+err = db.Write(batch, nil)
+...
+```
+Use bloom filter:
+```go
+o := &opt.Options{
+ Filter: filter.NewBloomFilter(10),
+}
+db, err := leveldb.OpenFile("path/to/db", o)
+...
+defer db.Close()
+...
+```
+Documentation
+-----------
+
+You can read package documentation [here](http:godoc.org/github.com/syndtr/goleveldb).
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/batch.go b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
new file mode 100644
index 000000000..225920002
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
@@ -0,0 +1,349 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrBatchCorrupted records reason of batch corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrBatchCorrupted struct {
+ Reason string
+}
+
+func (e *ErrBatchCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason)
+}
+
+func newErrBatchCorrupted(reason string) error {
+ return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason})
+}
+
+const (
+ batchHeaderLen = 8 + 4
+ batchGrowRec = 3000
+ batchBufioSize = 16
+)
+
+// BatchReplay wraps basic batch operations.
+type BatchReplay interface {
+ Put(key, value []byte)
+ Delete(key []byte)
+}
+
+type batchIndex struct {
+ keyType keyType
+ keyPos, keyLen int
+ valuePos, valueLen int
+}
+
+func (index batchIndex) k(data []byte) []byte {
+ return data[index.keyPos : index.keyPos+index.keyLen]
+}
+
+func (index batchIndex) v(data []byte) []byte {
+ if index.valueLen != 0 {
+ return data[index.valuePos : index.valuePos+index.valueLen]
+ }
+ return nil
+}
+
+func (index batchIndex) kv(data []byte) (key, value []byte) {
+ return index.k(data), index.v(data)
+}
+
+// Batch is a write batch.
+type Batch struct {
+ data []byte
+ index []batchIndex
+
+ // internalLen is sums of key/value pair length plus 8-bytes internal key.
+ internalLen int
+}
+
+func (b *Batch) grow(n int) {
+ o := len(b.data)
+ if cap(b.data)-o < n {
+ div := 1
+ if len(b.index) > batchGrowRec {
+ div = len(b.index) / batchGrowRec
+ }
+ ndata := make([]byte, o, o+n+o/div)
+ copy(ndata, b.data)
+ b.data = ndata
+ }
+}
+
+func (b *Batch) appendRec(kt keyType, key, value []byte) {
+ n := 1 + binary.MaxVarintLen32 + len(key)
+ if kt == keyTypeVal {
+ n += binary.MaxVarintLen32 + len(value)
+ }
+ b.grow(n)
+ index := batchIndex{keyType: kt}
+ o := len(b.data)
+ data := b.data[:o+n]
+ data[o] = byte(kt)
+ o++
+ o += binary.PutUvarint(data[o:], uint64(len(key)))
+ index.keyPos = o
+ index.keyLen = len(key)
+ o += copy(data[o:], key)
+ if kt == keyTypeVal {
+ o += binary.PutUvarint(data[o:], uint64(len(value)))
+ index.valuePos = o
+ index.valueLen = len(value)
+ o += copy(data[o:], value)
+ }
+ b.data = data[:o]
+ b.index = append(b.index, index)
+ b.internalLen += index.keyLen + index.valueLen + 8
+}
+
+// Put appends 'put operation' of the given key/value pair to the batch.
+// It is safe to modify the contents of the argument after Put returns but not
+// before.
+func (b *Batch) Put(key, value []byte) {
+ b.appendRec(keyTypeVal, key, value)
+}
+
+// Delete appends 'delete operation' of the given key to the batch.
+// It is safe to modify the contents of the argument after Delete returns but
+// not before.
+func (b *Batch) Delete(key []byte) {
+ b.appendRec(keyTypeDel, key, nil)
+}
+
+// Dump dumps batch contents. The returned slice can be loaded into the
+// batch using Load method.
+// The returned slice is not its own copy, so the contents should not be
+// modified.
+func (b *Batch) Dump() []byte {
+ return b.data
+}
+
+// Load loads given slice into the batch. Previous contents of the batch
+// will be discarded.
+// The given slice will not be copied and will be used as batch buffer, so
+// it is not safe to modify the contents of the slice.
+func (b *Batch) Load(data []byte) error {
+ return b.decode(data, -1)
+}
+
+// Replay replays batch contents.
+func (b *Batch) Replay(r BatchReplay) error {
+ for _, index := range b.index {
+ switch index.keyType {
+ case keyTypeVal:
+ r.Put(index.k(b.data), index.v(b.data))
+ case keyTypeDel:
+ r.Delete(index.k(b.data))
+ }
+ }
+ return nil
+}
+
+// Len returns number of records in the batch.
+func (b *Batch) Len() int {
+ return len(b.index)
+}
+
+// Reset resets the batch.
+func (b *Batch) Reset() {
+ b.data = b.data[:0]
+ b.index = b.index[:0]
+ b.internalLen = 0
+}
+
+func (b *Batch) replayInternal(fn func(i int, kt keyType, k, v []byte) error) error {
+ for i, index := range b.index {
+ if err := fn(i, index.keyType, index.k(b.data), index.v(b.data)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (b *Batch) append(p *Batch) {
+ ob := len(b.data)
+ oi := len(b.index)
+ b.data = append(b.data, p.data...)
+ b.index = append(b.index, p.index...)
+ b.internalLen += p.internalLen
+
+ // Updating index offset.
+ if ob != 0 {
+ for ; oi < len(b.index); oi++ {
+ index := &b.index[oi]
+ index.keyPos += ob
+ if index.valueLen != 0 {
+ index.valuePos += ob
+ }
+ }
+ }
+}
+
+func (b *Batch) decode(data []byte, expectedLen int) error {
+ b.data = data
+ b.index = b.index[:0]
+ b.internalLen = 0
+ err := decodeBatch(data, func(i int, index batchIndex) error {
+ b.index = append(b.index, index)
+ b.internalLen += index.keyLen + index.valueLen + 8
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ if expectedLen >= 0 && len(b.index) != expectedLen {
+ return newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", expectedLen, len(b.index)))
+ }
+ return nil
+}
+
+func (b *Batch) putMem(seq uint64, mdb *memdb.DB) error {
+ var ik []byte
+ for i, index := range b.index {
+ ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+ if err := mdb.Put(ik, index.v(b.data)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (b *Batch) revertMem(seq uint64, mdb *memdb.DB) error {
+ var ik []byte
+ for i, index := range b.index {
+ ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+ if err := mdb.Delete(ik); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func newBatch() interface{} {
+ return &Batch{}
+}
+
+func decodeBatch(data []byte, fn func(i int, index batchIndex) error) error {
+ var index batchIndex
+ for i, o := 0, 0; o < len(data); i++ {
+ // Key type.
+ index.keyType = keyType(data[o])
+ if index.keyType > keyTypeVal {
+ return newErrBatchCorrupted(fmt.Sprintf("bad record: invalid type %#x", uint(index.keyType)))
+ }
+ o++
+
+ // Key.
+ x, n := binary.Uvarint(data[o:])
+ o += n
+ if n <= 0 || o+int(x) > len(data) {
+ return newErrBatchCorrupted("bad record: invalid key length")
+ }
+ index.keyPos = o
+ index.keyLen = int(x)
+ o += index.keyLen
+
+ // Value.
+ if index.keyType == keyTypeVal {
+ x, n = binary.Uvarint(data[o:])
+ o += n
+ if n <= 0 || o+int(x) > len(data) {
+ return newErrBatchCorrupted("bad record: invalid value length")
+ }
+ index.valuePos = o
+ index.valueLen = int(x)
+ o += index.valueLen
+ } else {
+ index.valuePos = 0
+ index.valueLen = 0
+ }
+
+ if err := fn(i, index); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func decodeBatchToMem(data []byte, expectSeq uint64, mdb *memdb.DB) (seq uint64, batchLen int, err error) {
+ seq, batchLen, err = decodeBatchHeader(data)
+ if err != nil {
+ return 0, 0, err
+ }
+ if seq < expectSeq {
+ return 0, 0, newErrBatchCorrupted("invalid sequence number")
+ }
+ data = data[batchHeaderLen:]
+ var ik []byte
+ var decodedLen int
+ err = decodeBatch(data, func(i int, index batchIndex) error {
+ if i >= batchLen {
+ return newErrBatchCorrupted("invalid records length")
+ }
+ ik = makeInternalKey(ik, index.k(data), seq+uint64(i), index.keyType)
+ if err := mdb.Put(ik, index.v(data)); err != nil {
+ return err
+ }
+ decodedLen++
+ return nil
+ })
+ if err == nil && decodedLen != batchLen {
+ err = newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", batchLen, decodedLen))
+ }
+ return
+}
+
+func encodeBatchHeader(dst []byte, seq uint64, batchLen int) []byte {
+ dst = ensureBuffer(dst, batchHeaderLen)
+ binary.LittleEndian.PutUint64(dst, seq)
+ binary.LittleEndian.PutUint32(dst[8:], uint32(batchLen))
+ return dst
+}
+
+func decodeBatchHeader(data []byte) (seq uint64, batchLen int, err error) {
+ if len(data) < batchHeaderLen {
+ return 0, 0, newErrBatchCorrupted("too short")
+ }
+
+ seq = binary.LittleEndian.Uint64(data)
+ batchLen = int(binary.LittleEndian.Uint32(data[8:]))
+ if batchLen < 0 {
+ return 0, 0, newErrBatchCorrupted("invalid records length")
+ }
+ return
+}
+
+func batchesLen(batches []*Batch) int {
+ batchLen := 0
+ for _, batch := range batches {
+ batchLen += batch.Len()
+ }
+ return batchLen
+}
+
+func writeBatchesWithHeader(wr io.Writer, batches []*Batch, seq uint64) error {
+ if _, err := wr.Write(encodeBatchHeader(nil, seq, batchesLen(batches))); err != nil {
+ return err
+ }
+ for _, batch := range batches {
+ if _, err := wr.Write(batch.data); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
new file mode 100644
index 000000000..c5940b232
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
@@ -0,0 +1,705 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package cache provides interface and implementation of a cache algorithms.
+package cache
+
+import (
+ "sync"
+ "sync/atomic"
+ "unsafe"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Cacher provides interface to implements a caching functionality.
+// An implementation must be safe for concurrent use.
+type Cacher interface {
+ // Capacity returns cache capacity.
+ Capacity() int
+
+ // SetCapacity sets cache capacity.
+ SetCapacity(capacity int)
+
+ // Promote promotes the 'cache node'.
+ Promote(n *Node)
+
+ // Ban evicts the 'cache node' and prevent subsequent 'promote'.
+ Ban(n *Node)
+
+ // Evict evicts the 'cache node'.
+ Evict(n *Node)
+
+ // EvictNS evicts 'cache node' with the given namespace.
+ EvictNS(ns uint64)
+
+ // EvictAll evicts all 'cache node'.
+ EvictAll()
+
+ // Close closes the 'cache tree'
+ Close() error
+}
+
+// Value is a 'cacheable object'. It may implements util.Releaser, if
+// so the the Release method will be called once object is released.
+type Value interface{}
+
+// NamespaceGetter provides convenient wrapper for namespace.
+type NamespaceGetter struct {
+ Cache *Cache
+ NS uint64
+}
+
+// Get simply calls Cache.Get() method.
+func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
+ return g.Cache.Get(g.NS, key, setFunc)
+}
+
+// The hash tables implementation is based on:
+// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu,
+// Kunlong Zhang, and Michael Spear.
+// ACM Symposium on Principles of Distributed Computing, Jul 2014.
+
+const (
+ mInitialSize = 1 << 4
+ mOverflowThreshold = 1 << 5
+ mOverflowGrowThreshold = 1 << 7
+)
+
+type mBucket struct {
+ mu sync.Mutex
+ node []*Node
+ frozen bool
+}
+
+func (b *mBucket) freeze() []*Node {
+ b.mu.Lock()
+ defer b.mu.Unlock()
+ if !b.frozen {
+ b.frozen = true
+ }
+ return b.node
+}
+
+func (b *mBucket) get(r *Cache, h *mNode, hash uint32, ns, key uint64, noset bool) (done, added bool, n *Node) {
+ b.mu.Lock()
+
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
+
+ // Scan the node.
+ for _, n := range b.node {
+ if n.hash == hash && n.ns == ns && n.key == key {
+ atomic.AddInt32(&n.ref, 1)
+ b.mu.Unlock()
+ return true, false, n
+ }
+ }
+
+ // Get only.
+ if noset {
+ b.mu.Unlock()
+ return true, false, nil
+ }
+
+ // Create node.
+ n = &Node{
+ r: r,
+ hash: hash,
+ ns: ns,
+ key: key,
+ ref: 1,
+ }
+ // Add node to bucket.
+ b.node = append(b.node, n)
+ bLen := len(b.node)
+ b.mu.Unlock()
+
+ // Update counter.
+ grow := atomic.AddInt32(&r.nodes, 1) >= h.growThreshold
+ if bLen > mOverflowThreshold {
+ grow = grow || atomic.AddInt32(&h.overflow, 1) >= mOverflowGrowThreshold
+ }
+
+ // Grow.
+ if grow && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) << 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+
+ return true, true, n
+}
+
+func (b *mBucket) delete(r *Cache, h *mNode, hash uint32, ns, key uint64) (done, deleted bool) {
+ b.mu.Lock()
+
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
+
+ // Scan the node.
+ var (
+ n *Node
+ bLen int
+ )
+ for i := range b.node {
+ n = b.node[i]
+ if n.ns == ns && n.key == key {
+ if atomic.LoadInt32(&n.ref) == 0 {
+ deleted = true
+
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Remove node from bucket.
+ b.node = append(b.node[:i], b.node[i+1:]...)
+ bLen = len(b.node)
+ }
+ break
+ }
+ }
+ b.mu.Unlock()
+
+ if deleted {
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+
+ // Update counter.
+ atomic.AddInt32(&r.size, int32(n.size)*-1)
+ shrink := atomic.AddInt32(&r.nodes, -1) < h.shrinkThreshold
+ if bLen >= mOverflowThreshold {
+ atomic.AddInt32(&h.overflow, -1)
+ }
+
+ // Shrink.
+ if shrink && len(h.buckets) > mInitialSize && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) >> 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+ }
+
+ return true, deleted
+}
+
+type mNode struct {
+ buckets []unsafe.Pointer // []*mBucket
+ mask uint32
+ pred unsafe.Pointer // *mNode
+ resizeInProgess int32
+
+ overflow int32
+ growThreshold int32
+ shrinkThreshold int32
+}
+
+func (n *mNode) initBucket(i uint32) *mBucket {
+ if b := (*mBucket)(atomic.LoadPointer(&n.buckets[i])); b != nil {
+ return b
+ }
+
+ p := (*mNode)(atomic.LoadPointer(&n.pred))
+ if p != nil {
+ var node []*Node
+ if n.mask > p.mask {
+ // Grow.
+ pb := (*mBucket)(atomic.LoadPointer(&p.buckets[i&p.mask]))
+ if pb == nil {
+ pb = p.initBucket(i & p.mask)
+ }
+ m := pb.freeze()
+ // Split nodes.
+ for _, x := range m {
+ if x.hash&n.mask == i {
+ node = append(node, x)
+ }
+ }
+ } else {
+ // Shrink.
+ pb0 := (*mBucket)(atomic.LoadPointer(&p.buckets[i]))
+ if pb0 == nil {
+ pb0 = p.initBucket(i)
+ }
+ pb1 := (*mBucket)(atomic.LoadPointer(&p.buckets[i+uint32(len(n.buckets))]))
+ if pb1 == nil {
+ pb1 = p.initBucket(i + uint32(len(n.buckets)))
+ }
+ m0 := pb0.freeze()
+ m1 := pb1.freeze()
+ // Merge nodes.
+ node = make([]*Node, 0, len(m0)+len(m1))
+ node = append(node, m0...)
+ node = append(node, m1...)
+ }
+ b := &mBucket{node: node}
+ if atomic.CompareAndSwapPointer(&n.buckets[i], nil, unsafe.Pointer(b)) {
+ if len(node) > mOverflowThreshold {
+ atomic.AddInt32(&n.overflow, int32(len(node)-mOverflowThreshold))
+ }
+ return b
+ }
+ }
+
+ return (*mBucket)(atomic.LoadPointer(&n.buckets[i]))
+}
+
+func (n *mNode) initBuckets() {
+ for i := range n.buckets {
+ n.initBucket(uint32(i))
+ }
+ atomic.StorePointer(&n.pred, nil)
+}
+
+// Cache is a 'cache map'.
+type Cache struct {
+ mu sync.RWMutex
+ mHead unsafe.Pointer // *mNode
+ nodes int32
+ size int32
+ cacher Cacher
+ closed bool
+}
+
+// NewCache creates a new 'cache map'. The cacher is optional and
+// may be nil.
+func NewCache(cacher Cacher) *Cache {
+ h := &mNode{
+ buckets: make([]unsafe.Pointer, mInitialSize),
+ mask: mInitialSize - 1,
+ growThreshold: int32(mInitialSize * mOverflowThreshold),
+ shrinkThreshold: 0,
+ }
+ for i := range h.buckets {
+ h.buckets[i] = unsafe.Pointer(&mBucket{})
+ }
+ r := &Cache{
+ mHead: unsafe.Pointer(h),
+ cacher: cacher,
+ }
+ return r
+}
+
+func (r *Cache) getBucket(hash uint32) (*mNode, *mBucket) {
+ h := (*mNode)(atomic.LoadPointer(&r.mHead))
+ i := hash & h.mask
+ b := (*mBucket)(atomic.LoadPointer(&h.buckets[i]))
+ if b == nil {
+ b = h.initBucket(i)
+ }
+ return h, b
+}
+
+func (r *Cache) delete(n *Node) bool {
+ for {
+ h, b := r.getBucket(n.hash)
+ done, deleted := b.delete(r, h, n.hash, n.ns, n.key)
+ if done {
+ return deleted
+ }
+ }
+ return false
+}
+
+// Nodes returns number of 'cache node' in the map.
+func (r *Cache) Nodes() int {
+ return int(atomic.LoadInt32(&r.nodes))
+}
+
+// Size returns sums of 'cache node' size in the map.
+func (r *Cache) Size() int {
+ return int(atomic.LoadInt32(&r.size))
+}
+
+// Capacity returns cache capacity.
+func (r *Cache) Capacity() int {
+ if r.cacher == nil {
+ return 0
+ }
+ return r.cacher.Capacity()
+}
+
+// SetCapacity sets cache capacity.
+func (r *Cache) SetCapacity(capacity int) {
+ if r.cacher != nil {
+ r.cacher.SetCapacity(capacity)
+ }
+}
+
+// Get gets 'cache node' with the given namespace and key.
+// If cache node is not found and setFunc is not nil, Get will atomically creates
+// the 'cache node' by calling setFunc. Otherwise Get will returns nil.
+//
+// The returned 'cache handle' should be released after use by calling Release
+// method.
+func (r *Cache) Get(ns, key uint64, setFunc func() (size int, value Value)) *Handle {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return nil
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, setFunc == nil)
+ if done {
+ if n != nil {
+ n.mu.Lock()
+ if n.value == nil {
+ if setFunc == nil {
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+
+ n.size, n.value = setFunc()
+ if n.value == nil {
+ n.size = 0
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+ atomic.AddInt32(&r.size, int32(n.size))
+ }
+ n.mu.Unlock()
+ if r.cacher != nil {
+ r.cacher.Promote(n)
+ }
+ return &Handle{unsafe.Pointer(n)}
+ }
+
+ break
+ }
+ }
+ return nil
+}
+
+// Delete removes and ban 'cache node' with the given namespace and key.
+// A banned 'cache node' will never inserted into the 'cache tree'. Ban
+// only attributed to the particular 'cache node', so when a 'cache node'
+// is recreated it will not be banned.
+//
+// If onDel is not nil, then it will be executed if such 'cache node'
+// doesn't exist or once the 'cache node' is released.
+//
+// Delete return true is such 'cache node' exist.
+func (r *Cache) Delete(ns, key uint64, onDel func()) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if onDel != nil {
+ n.mu.Lock()
+ n.onDel = append(n.onDel, onDel)
+ n.mu.Unlock()
+ }
+ if r.cacher != nil {
+ r.cacher.Ban(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ if onDel != nil {
+ onDel()
+ }
+
+ return false
+}
+
+// Evict evicts 'cache node' with the given namespace and key. This will
+// simply call Cacher.Evict.
+//
+// Evict return true is such 'cache node' exist.
+func (r *Cache) Evict(ns, key uint64) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if r.cacher != nil {
+ r.cacher.Evict(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ return false
+}
+
+// EvictNS evicts 'cache node' with the given namespace. This will
+// simply call Cacher.EvictNS.
+func (r *Cache) EvictNS(ns uint64) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return
+ }
+
+ if r.cacher != nil {
+ r.cacher.EvictNS(ns)
+ }
+}
+
+// EvictAll evicts all 'cache node'. This will simply call Cacher.EvictAll.
+func (r *Cache) EvictAll() {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return
+ }
+
+ if r.cacher != nil {
+ r.cacher.EvictAll()
+ }
+}
+
+// Close closes the 'cache map' and forcefully releases all 'cache node'.
+func (r *Cache) Close() error {
+ r.mu.Lock()
+ if !r.closed {
+ r.closed = true
+
+ h := (*mNode)(r.mHead)
+ h.initBuckets()
+
+ for i := range h.buckets {
+ b := (*mBucket)(h.buckets[i])
+ for _, n := range b.node {
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+ n.onDel = nil
+ }
+ }
+ }
+ r.mu.Unlock()
+
+ // Avoid deadlock.
+ if r.cacher != nil {
+ if err := r.cacher.Close(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// CloseWeak closes the 'cache map' and evict all 'cache node' from cacher, but
+// unlike Close it doesn't forcefully releases 'cache node'.
+func (r *Cache) CloseWeak() error {
+ r.mu.Lock()
+ if !r.closed {
+ r.closed = true
+ }
+ r.mu.Unlock()
+
+ // Avoid deadlock.
+ if r.cacher != nil {
+ r.cacher.EvictAll()
+ if err := r.cacher.Close(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Node is a 'cache node'.
+type Node struct {
+ r *Cache
+
+ hash uint32
+ ns, key uint64
+
+ mu sync.Mutex
+ size int
+ value Value
+
+ ref int32
+ onDel []func()
+
+ CacheData unsafe.Pointer
+}
+
+// NS returns this 'cache node' namespace.
+func (n *Node) NS() uint64 {
+ return n.ns
+}
+
+// Key returns this 'cache node' key.
+func (n *Node) Key() uint64 {
+ return n.key
+}
+
+// Size returns this 'cache node' size.
+func (n *Node) Size() int {
+ return n.size
+}
+
+// Value returns this 'cache node' value.
+func (n *Node) Value() Value {
+ return n.value
+}
+
+// Ref returns this 'cache node' ref counter.
+func (n *Node) Ref() int32 {
+ return atomic.LoadInt32(&n.ref)
+}
+
+// GetHandle returns an handle for this 'cache node'.
+func (n *Node) GetHandle() *Handle {
+ if atomic.AddInt32(&n.ref, 1) <= 1 {
+ panic("BUG: Node.GetHandle on zero ref")
+ }
+ return &Handle{unsafe.Pointer(n)}
+}
+
+func (n *Node) unref() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.delete(n)
+ }
+}
+
+func (n *Node) unrefLocked() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.mu.RLock()
+ if !n.r.closed {
+ n.r.delete(n)
+ }
+ n.r.mu.RUnlock()
+ }
+}
+
+// Handle is a 'cache handle' of a 'cache node'.
+type Handle struct {
+ n unsafe.Pointer // *Node
+}
+
+// Value returns the value of the 'cache node'.
+func (h *Handle) Value() Value {
+ n := (*Node)(atomic.LoadPointer(&h.n))
+ if n != nil {
+ return n.value
+ }
+ return nil
+}
+
+// Release releases this 'cache handle'.
+// It is safe to call release multiple times.
+func (h *Handle) Release() {
+ nPtr := atomic.LoadPointer(&h.n)
+ if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) {
+ n := (*Node)(nPtr)
+ n.unrefLocked()
+ }
+}
+
+func murmur32(ns, key uint64, seed uint32) uint32 {
+ const (
+ m = uint32(0x5bd1e995)
+ r = 24
+ )
+
+ k1 := uint32(ns >> 32)
+ k2 := uint32(ns)
+ k3 := uint32(key >> 32)
+ k4 := uint32(key)
+
+ k1 *= m
+ k1 ^= k1 >> r
+ k1 *= m
+
+ k2 *= m
+ k2 ^= k2 >> r
+ k2 *= m
+
+ k3 *= m
+ k3 ^= k3 >> r
+ k3 *= m
+
+ k4 *= m
+ k4 ^= k4 >> r
+ k4 *= m
+
+ h := seed
+
+ h *= m
+ h ^= k1
+ h *= m
+ h ^= k2
+ h *= m
+ h ^= k3
+ h *= m
+ h ^= k4
+
+ h ^= h >> 13
+ h *= m
+ h ^= h >> 15
+
+ return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
new file mode 100644
index 000000000..d9a84cde1
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
@@ -0,0 +1,195 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package cache
+
+import (
+ "sync"
+ "unsafe"
+)
+
+type lruNode struct {
+ n *Node
+ h *Handle
+ ban bool
+
+ next, prev *lruNode
+}
+
+func (n *lruNode) insert(at *lruNode) {
+ x := at.next
+ at.next = n
+ n.prev = at
+ n.next = x
+ x.prev = n
+}
+
+func (n *lruNode) remove() {
+ if n.prev != nil {
+ n.prev.next = n.next
+ n.next.prev = n.prev
+ n.prev = nil
+ n.next = nil
+ } else {
+ panic("BUG: removing removed node")
+ }
+}
+
+type lru struct {
+ mu sync.Mutex
+ capacity int
+ used int
+ recent lruNode
+}
+
+func (r *lru) reset() {
+ r.recent.next = &r.recent
+ r.recent.prev = &r.recent
+ r.used = 0
+}
+
+func (r *lru) Capacity() int {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ return r.capacity
+}
+
+func (r *lru) SetCapacity(capacity int) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ r.capacity = capacity
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Promote(n *Node) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ if n.CacheData == nil {
+ if n.Size() <= r.capacity {
+ rn := &lruNode{n: n, h: n.GetHandle()}
+ rn.insert(&r.recent)
+ n.CacheData = unsafe.Pointer(rn)
+ r.used += n.Size()
+
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.insert(&r.recent)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Ban(n *Node) {
+ r.mu.Lock()
+ if n.CacheData == nil {
+ n.CacheData = unsafe.Pointer(&lruNode{n: n, ban: true})
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.ban = true
+ r.used -= rn.n.Size()
+ r.mu.Unlock()
+
+ rn.h.Release()
+ rn.h = nil
+ return
+ }
+ }
+ r.mu.Unlock()
+}
+
+func (r *lru) Evict(n *Node) {
+ r.mu.Lock()
+ rn := (*lruNode)(n.CacheData)
+ if rn == nil || rn.ban {
+ r.mu.Unlock()
+ return
+ }
+ n.CacheData = nil
+ r.mu.Unlock()
+
+ rn.h.Release()
+}
+
+func (r *lru) EvictNS(ns uint64) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ for e := r.recent.prev; e != &r.recent; {
+ rn := e
+ e = e.prev
+ if rn.n.NS() == ns {
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) EvictAll() {
+ r.mu.Lock()
+ back := r.recent.prev
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.n.CacheData = nil
+ }
+ r.reset()
+ r.mu.Unlock()
+
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Close() error {
+ return nil
+}
+
+// NewLRU create a new LRU-cache.
+func NewLRU(capacity int) Cacher {
+ r := &lru{capacity: capacity}
+ r.reset()
+ return r
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
new file mode 100644
index 000000000..448402b82
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
@@ -0,0 +1,67 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+)
+
+type iComparer struct {
+ ucmp comparer.Comparer
+}
+
+func (icmp *iComparer) uName() string {
+ return icmp.ucmp.Name()
+}
+
+func (icmp *iComparer) uCompare(a, b []byte) int {
+ return icmp.ucmp.Compare(a, b)
+}
+
+func (icmp *iComparer) uSeparator(dst, a, b []byte) []byte {
+ return icmp.ucmp.Separator(dst, a, b)
+}
+
+func (icmp *iComparer) uSuccessor(dst, b []byte) []byte {
+ return icmp.ucmp.Successor(dst, b)
+}
+
+func (icmp *iComparer) Name() string {
+ return icmp.uName()
+}
+
+func (icmp *iComparer) Compare(a, b []byte) int {
+ x := icmp.uCompare(internalKey(a).ukey(), internalKey(b).ukey())
+ if x == 0 {
+ if m, n := internalKey(a).num(), internalKey(b).num(); m > n {
+ return -1
+ } else if m < n {
+ return 1
+ }
+ }
+ return x
+}
+
+func (icmp *iComparer) Separator(dst, a, b []byte) []byte {
+ ua, ub := internalKey(a).ukey(), internalKey(b).ukey()
+ dst = icmp.uSeparator(dst, ua, ub)
+ if dst != nil && len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 {
+ // Append earliest possible number.
+ return append(dst, keyMaxNumBytes...)
+ }
+ return nil
+}
+
+func (icmp *iComparer) Successor(dst, b []byte) []byte {
+ ub := internalKey(b).ukey()
+ dst = icmp.uSuccessor(dst, ub)
+ if dst != nil && len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 {
+ // Append earliest possible number.
+ return append(dst, keyMaxNumBytes...)
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
new file mode 100644
index 000000000..14dddf88d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
@@ -0,0 +1,51 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package comparer
+
+import "bytes"
+
+type bytesComparer struct{}
+
+func (bytesComparer) Compare(a, b []byte) int {
+ return bytes.Compare(a, b)
+}
+
+func (bytesComparer) Name() string {
+ return "leveldb.BytewiseComparator"
+}
+
+func (bytesComparer) Separator(dst, a, b []byte) []byte {
+ i, n := 0, len(a)
+ if n > len(b) {
+ n = len(b)
+ }
+ for ; i < n && a[i] == b[i]; i++ {
+ }
+ if i >= n {
+ // Do not shorten if one string is a prefix of the other
+ } else if c := a[i]; c < 0xff && c+1 < b[i] {
+ dst = append(dst, a[:i+1]...)
+ dst[i]++
+ return dst
+ }
+ return nil
+}
+
+func (bytesComparer) Successor(dst, b []byte) []byte {
+ for i, c := range b {
+ if c != 0xff {
+ dst = append(dst, b[:i+1]...)
+ dst[i]++
+ return dst
+ }
+ }
+ return nil
+}
+
+// DefaultComparer are default implementation of the Comparer interface.
+// It uses the natural ordering, consistent with bytes.Compare.
+var DefaultComparer = bytesComparer{}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
new file mode 100644
index 000000000..14a28f16f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
@@ -0,0 +1,57 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package comparer provides interface and implementation for ordering
+// sets of data.
+package comparer
+
+// BasicComparer is the interface that wraps the basic Compare method.
+type BasicComparer interface {
+ // Compare returns -1, 0, or +1 depending on whether a is 'less than',
+ // 'equal to' or 'greater than' b. The two arguments can only be 'equal'
+ // if their contents are exactly equal. Furthermore, the empty slice
+ // must be 'less than' any non-empty slice.
+ Compare(a, b []byte) int
+}
+
+// Comparer defines a total ordering over the space of []byte keys: a 'less
+// than' relationship.
+type Comparer interface {
+ BasicComparer
+
+ // Name returns name of the comparer.
+ //
+ // The Level-DB on-disk format stores the comparer name, and opening a
+ // database with a different comparer from the one it was created with
+ // will result in an error.
+ //
+ // An implementation to a new name whenever the comparer implementation
+ // changes in a way that will cause the relative ordering of any two keys
+ // to change.
+ //
+ // Names starting with "leveldb." are reserved and should not be used
+ // by any users of this package.
+ Name() string
+
+ // Bellow are advanced functions used used to reduce the space requirements
+ // for internal data structures such as index blocks.
+
+ // Separator appends a sequence of bytes x to dst such that a <= x && x < b,
+ // where 'less than' is consistent with Compare. An implementation should
+ // return nil if x equal to a.
+ //
+ // Either contents of a or b should not by any means modified. Doing so
+ // may cause corruption on the internal state.
+ Separator(dst, a, b []byte) []byte
+
+ // Successor appends a sequence of bytes x to dst such that x >= b, where
+ // 'less than' is consistent with Compare. An implementation should return
+ // nil if x equal to b.
+ //
+ // Contents of b should not by any means modified. Doing so may cause
+ // corruption on the internal state.
+ Successor(dst, b []byte) []byte
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db.go b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
new file mode 100644
index 000000000..a02cb2c50
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
@@ -0,0 +1,1091 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "container/list"
+ "fmt"
+ "io"
+ "os"
+ "runtime"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/table"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// DB is a LevelDB database.
+type DB struct {
+ // Need 64-bit alignment.
+ seq uint64
+
+ // Session.
+ s *session
+
+ // MemDB.
+ memMu sync.RWMutex
+ memPool chan *memdb.DB
+ mem, frozenMem *memDB
+ journal *journal.Writer
+ journalWriter storage.Writer
+ journalFd storage.FileDesc
+ frozenJournalFd storage.FileDesc
+ frozenSeq uint64
+
+ // Snapshot.
+ snapsMu sync.Mutex
+ snapsList *list.List
+
+ // Stats.
+ aliveSnaps, aliveIters int32
+
+ // Write.
+ batchPool sync.Pool
+ writeMergeC chan writeMerge
+ writeMergedC chan bool
+ writeLockC chan struct{}
+ writeAckC chan error
+ writeDelay time.Duration
+ writeDelayN int
+ tr *Transaction
+
+ // Compaction.
+ compCommitLk sync.Mutex
+ tcompCmdC chan cCmd
+ tcompPauseC chan chan<- struct{}
+ mcompCmdC chan cCmd
+ compErrC chan error
+ compPerErrC chan error
+ compErrSetC chan error
+ compWriteLocking bool
+ compStats cStats
+ memdbMaxLevel int // For testing.
+
+ // Close.
+ closeW sync.WaitGroup
+ closeC chan struct{}
+ closed uint32
+ closer io.Closer
+}
+
+func openDB(s *session) (*DB, error) {
+ s.log("db@open opening")
+ start := time.Now()
+ db := &DB{
+ s: s,
+ // Initial sequence
+ seq: s.stSeqNum,
+ // MemDB
+ memPool: make(chan *memdb.DB, 1),
+ // Snapshot
+ snapsList: list.New(),
+ // Write
+ batchPool: sync.Pool{New: newBatch},
+ writeMergeC: make(chan writeMerge),
+ writeMergedC: make(chan bool),
+ writeLockC: make(chan struct{}, 1),
+ writeAckC: make(chan error),
+ // Compaction
+ tcompCmdC: make(chan cCmd),
+ tcompPauseC: make(chan chan<- struct{}),
+ mcompCmdC: make(chan cCmd),
+ compErrC: make(chan error),
+ compPerErrC: make(chan error),
+ compErrSetC: make(chan error),
+ // Close
+ closeC: make(chan struct{}),
+ }
+
+ // Read-only mode.
+ readOnly := s.o.GetReadOnly()
+
+ if readOnly {
+ // Recover journals (read-only mode).
+ if err := db.recoverJournalRO(); err != nil {
+ return nil, err
+ }
+ } else {
+ // Recover journals.
+ if err := db.recoverJournal(); err != nil {
+ return nil, err
+ }
+
+ // Remove any obsolete files.
+ if err := db.checkAndCleanFiles(); err != nil {
+ // Close journal.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ }
+ return nil, err
+ }
+
+ }
+
+ // Doesn't need to be included in the wait group.
+ go db.compactionError()
+ go db.mpoolDrain()
+
+ if readOnly {
+ db.SetReadOnly()
+ } else {
+ db.closeW.Add(2)
+ go db.tCompaction()
+ go db.mCompaction()
+ // go db.jWriter()
+ }
+
+ s.logf("db@open done T·%v", time.Since(start))
+
+ runtime.SetFinalizer(db, (*DB).Close)
+ return db, nil
+}
+
+// Open opens or creates a DB for the given storage.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist Open will returns
+// os.ErrExist error.
+//
+// Open will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
+ if err != nil {
+ return
+ }
+ defer func() {
+ if err != nil {
+ s.close()
+ s.release()
+ }
+ }()
+
+ err = s.recover()
+ if err != nil {
+ if !os.IsNotExist(err) || s.o.GetErrorIfMissing() {
+ return
+ }
+ err = s.create()
+ if err != nil {
+ return
+ }
+ } else if s.o.GetErrorIfExist() {
+ err = os.ErrExist
+ return
+ }
+
+ return openDB(s)
+}
+
+// OpenFile opens or creates a DB for the given path.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist OpenFile will returns
+// os.ErrExist error.
+//
+// OpenFile uses standard file-system backed storage implementation as
+// described in the leveldb/storage package.
+//
+// OpenFile will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func OpenFile(path string, o *opt.Options) (db *DB, err error) {
+ stor, err := storage.OpenFile(path, o.GetReadOnly())
+ if err != nil {
+ return
+ }
+ db, err = Open(stor, o)
+ if err != nil {
+ stor.Close()
+ } else {
+ db.closer = stor
+ }
+ return
+}
+
+// Recover recovers and opens a DB with missing or corrupted manifest files
+// for the given storage. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
+ if err != nil {
+ return
+ }
+ defer func() {
+ if err != nil {
+ s.close()
+ s.release()
+ }
+ }()
+
+ err = recoverTable(s, o)
+ if err != nil {
+ return
+ }
+ return openDB(s)
+}
+
+// RecoverFile recovers and opens a DB with missing or corrupted manifest files
+// for the given path. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// RecoverFile uses standard file-system backed storage implementation as described
+// in the leveldb/storage package.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
+ stor, err := storage.OpenFile(path, false)
+ if err != nil {
+ return
+ }
+ db, err = Recover(stor, o)
+ if err != nil {
+ stor.Close()
+ } else {
+ db.closer = stor
+ }
+ return
+}
+
+func recoverTable(s *session, o *opt.Options) error {
+ o = dupOptions(o)
+ // Mask StrictReader, lets StrictRecovery doing its job.
+ o.Strict &= ^opt.StrictReader
+
+ // Get all tables and sort it by file number.
+ fds, err := s.stor.List(storage.TypeTable)
+ if err != nil {
+ return err
+ }
+ sortFds(fds)
+
+ var (
+ maxSeq uint64
+ recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
+
+ // We will drop corrupted table.
+ strict = o.GetStrict(opt.StrictRecovery)
+ noSync = o.GetNoSync()
+
+ rec = &sessionRecord{}
+ bpool = util.NewBufferPool(o.GetBlockSize() + 5)
+ )
+ buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
+ tmpFd = s.newTemp()
+ writer, err := s.stor.Create(tmpFd)
+ if err != nil {
+ return
+ }
+ defer func() {
+ writer.Close()
+ if err != nil {
+ s.stor.Remove(tmpFd)
+ tmpFd = storage.FileDesc{}
+ }
+ }()
+
+ // Copy entries.
+ tw := table.NewWriter(writer, o)
+ for iter.Next() {
+ key := iter.Key()
+ if validInternalKey(key) {
+ err = tw.Append(key, iter.Value())
+ if err != nil {
+ return
+ }
+ }
+ }
+ err = iter.Error()
+ if err != nil {
+ return
+ }
+ err = tw.Close()
+ if err != nil {
+ return
+ }
+ if !noSync {
+ err = writer.Sync()
+ if err != nil {
+ return
+ }
+ }
+ size = int64(tw.BytesLen())
+ return
+ }
+ recoverTable := func(fd storage.FileDesc) error {
+ s.logf("table@recovery recovering @%d", fd.Num)
+ reader, err := s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+ var closed bool
+ defer func() {
+ if !closed {
+ reader.Close()
+ }
+ }()
+
+ // Get file size.
+ size, err := reader.Seek(0, 2)
+ if err != nil {
+ return err
+ }
+
+ var (
+ tSeq uint64
+ tgoodKey, tcorruptedKey, tcorruptedBlock int
+ imin, imax []byte
+ )
+ tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
+ if err != nil {
+ return err
+ }
+ iter := tr.NewIterator(nil, nil)
+ if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
+ itererr.SetErrorCallback(func(err error) {
+ if errors.IsCorrupted(err) {
+ s.logf("table@recovery block corruption @%d %q", fd.Num, err)
+ tcorruptedBlock++
+ }
+ })
+ }
+
+ // Scan the table.
+ for iter.Next() {
+ key := iter.Key()
+ _, seq, _, kerr := parseInternalKey(key)
+ if kerr != nil {
+ tcorruptedKey++
+ continue
+ }
+ tgoodKey++
+ if seq > tSeq {
+ tSeq = seq
+ }
+ if imin == nil {
+ imin = append([]byte{}, key...)
+ }
+ imax = append(imax[:0], key...)
+ }
+ if err := iter.Error(); err != nil {
+ iter.Release()
+ return err
+ }
+ iter.Release()
+
+ goodKey += tgoodKey
+ corruptedKey += tcorruptedKey
+ corruptedBlock += tcorruptedBlock
+
+ if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
+ droppedTable++
+ s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+ return nil
+ }
+
+ if tgoodKey > 0 {
+ if tcorruptedKey > 0 || tcorruptedBlock > 0 {
+ // Rebuild the table.
+ s.logf("table@recovery rebuilding @%d", fd.Num)
+ iter := tr.NewIterator(nil, nil)
+ tmpFd, newSize, err := buildTable(iter)
+ iter.Release()
+ if err != nil {
+ return err
+ }
+ closed = true
+ reader.Close()
+ if err := s.stor.Rename(tmpFd, fd); err != nil {
+ return err
+ }
+ size = newSize
+ }
+ if tSeq > maxSeq {
+ maxSeq = tSeq
+ }
+ recoveredKey += tgoodKey
+ // Add table to level 0.
+ rec.addTable(0, fd.Num, size, imin, imax)
+ s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+ } else {
+ droppedTable++
+ s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
+ }
+
+ return nil
+ }
+
+ // Recover all tables.
+ if len(fds) > 0 {
+ s.logf("table@recovery F·%d", len(fds))
+
+ // Mark file number as used.
+ s.markFileNum(fds[len(fds)-1].Num)
+
+ for _, fd := range fds {
+ if err := recoverTable(fd); err != nil {
+ return err
+ }
+ }
+
+ s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
+ }
+
+ // Set sequence number.
+ rec.setSeqNum(maxSeq)
+
+ // Create new manifest.
+ if err := s.create(); err != nil {
+ return err
+ }
+
+ // Commit.
+ return s.commit(rec)
+}
+
+func (db *DB) recoverJournal() error {
+ // Get all journals and sort it by file number.
+ rawFds, err := db.s.stor.List(storage.TypeJournal)
+ if err != nil {
+ return err
+ }
+ sortFds(rawFds)
+
+ // Journals that will be recovered.
+ var fds []storage.FileDesc
+ for _, fd := range rawFds {
+ if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+ fds = append(fds, fd)
+ }
+ }
+
+ var (
+ ofd storage.FileDesc // Obsolete file.
+ rec = &sessionRecord{}
+ )
+
+ // Recover journals.
+ if len(fds) > 0 {
+ db.logf("journal@recovery F·%d", len(fds))
+
+ // Mark file number as used.
+ db.s.markFileNum(fds[len(fds)-1].Num)
+
+ var (
+ // Options.
+ strict = db.s.o.GetStrict(opt.StrictJournal)
+ checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
+ writeBuffer = db.s.o.GetWriteBuffer()
+
+ jr *journal.Reader
+ mdb = memdb.New(db.s.icmp, writeBuffer)
+ buf = &util.Buffer{}
+ batchSeq uint64
+ batchLen int
+ )
+
+ for _, fd := range fds {
+ db.logf("journal@recovery recovering @%d", fd.Num)
+
+ fr, err := db.s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+
+ // Create or reset journal reader instance.
+ if jr == nil {
+ jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+ } else {
+ jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+ }
+
+ // Flush memdb and remove obsolete journal file.
+ if !ofd.Zero() {
+ if mdb.Len() > 0 {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ fr.Close()
+ return err
+ }
+ }
+
+ rec.setJournalNum(fd.Num)
+ rec.setSeqNum(db.seq)
+ if err := db.s.commit(rec); err != nil {
+ fr.Close()
+ return err
+ }
+ rec.resetAddedTables()
+
+ db.s.stor.Remove(ofd)
+ ofd = storage.FileDesc{}
+ }
+
+ // Replay journal to memdb.
+ mdb.Reset()
+ for {
+ r, err := jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ buf.Reset()
+ if _, err := buf.ReadFrom(r); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ // This is error returned due to corruption, with strict == false.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+ batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+ if err != nil {
+ if !strict && errors.IsCorrupted(err) {
+ db.s.logf("journal error: %v (skipped)", err)
+ // We won't apply sequence number as it might be corrupted.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ // Save sequence number.
+ db.seq = batchSeq + uint64(batchLen)
+
+ // Flush it if large enough.
+ if mdb.Size() >= writeBuffer {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ fr.Close()
+ return err
+ }
+
+ mdb.Reset()
+ }
+ }
+
+ fr.Close()
+ ofd = fd
+ }
+
+ // Flush the last memdb.
+ if mdb.Len() > 0 {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ return err
+ }
+ }
+ }
+
+ // Create a new journal.
+ if _, err := db.newMem(0); err != nil {
+ return err
+ }
+
+ // Commit.
+ rec.setJournalNum(db.journalFd.Num)
+ rec.setSeqNum(db.seq)
+ if err := db.s.commit(rec); err != nil {
+ // Close journal on error.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ }
+ return err
+ }
+
+ // Remove the last obsolete journal file.
+ if !ofd.Zero() {
+ db.s.stor.Remove(ofd)
+ }
+
+ return nil
+}
+
+func (db *DB) recoverJournalRO() error {
+ // Get all journals and sort it by file number.
+ rawFds, err := db.s.stor.List(storage.TypeJournal)
+ if err != nil {
+ return err
+ }
+ sortFds(rawFds)
+
+ // Journals that will be recovered.
+ var fds []storage.FileDesc
+ for _, fd := range rawFds {
+ if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+ fds = append(fds, fd)
+ }
+ }
+
+ var (
+ // Options.
+ strict = db.s.o.GetStrict(opt.StrictJournal)
+ checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
+ writeBuffer = db.s.o.GetWriteBuffer()
+
+ mdb = memdb.New(db.s.icmp, writeBuffer)
+ )
+
+ // Recover journals.
+ if len(fds) > 0 {
+ db.logf("journal@recovery RO·Mode F·%d", len(fds))
+
+ var (
+ jr *journal.Reader
+ buf = &util.Buffer{}
+ batchSeq uint64
+ batchLen int
+ )
+
+ for _, fd := range fds {
+ db.logf("journal@recovery recovering @%d", fd.Num)
+
+ fr, err := db.s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+
+ // Create or reset journal reader instance.
+ if jr == nil {
+ jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+ } else {
+ jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+ }
+
+ // Replay journal to memdb.
+ for {
+ r, err := jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ buf.Reset()
+ if _, err := buf.ReadFrom(r); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ // This is error returned due to corruption, with strict == false.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+ batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+ if err != nil {
+ if !strict && errors.IsCorrupted(err) {
+ db.s.logf("journal error: %v (skipped)", err)
+ // We won't apply sequence number as it might be corrupted.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ // Save sequence number.
+ db.seq = batchSeq + uint64(batchLen)
+ }
+
+ fr.Close()
+ }
+ }
+
+ // Set memDB.
+ db.mem = &memDB{db: db, DB: mdb, ref: 1}
+
+ return nil
+}
+
+func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
+ mk, mv, err := mdb.Find(ikey)
+ if err == nil {
+ ukey, _, kt, kerr := parseInternalKey(mk)
+ if kerr != nil {
+ // Shouldn't have had happen.
+ panic(kerr)
+ }
+ if icmp.uCompare(ukey, ikey.ukey()) == 0 {
+ if kt == keyTypeDel {
+ return true, nil, ErrNotFound
+ }
+ return true, mv, nil
+
+ }
+ } else if err != ErrNotFound {
+ return true, nil, err
+ }
+ return
+}
+
+func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
+ ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+ if auxm != nil {
+ if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
+ return append([]byte{}, mv...), me
+ }
+ }
+
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
+ if m == nil {
+ continue
+ }
+ defer m.decref()
+
+ if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
+ return append([]byte{}, mv...), me
+ }
+ }
+
+ v := db.s.version()
+ value, cSched, err := v.get(auxt, ikey, ro, false)
+ v.release()
+ if cSched {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ return
+}
+
+func nilIfNotFound(err error) error {
+ if err == ErrNotFound {
+ return nil
+ }
+ return err
+}
+
+func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
+ ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+ if auxm != nil {
+ if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
+ return me == nil, nilIfNotFound(me)
+ }
+ }
+
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
+ if m == nil {
+ continue
+ }
+ defer m.decref()
+
+ if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
+ return me == nil, nilIfNotFound(me)
+ }
+ }
+
+ v := db.s.version()
+ _, cSched, err := v.get(auxt, ikey, ro, true)
+ v.release()
+ if cSched {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ if err == nil {
+ ret = true
+ } else if err == ErrNotFound {
+ err = nil
+ }
+ return
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.get(nil, nil, key, se.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.has(nil, nil, key, se.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the
+// underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := db.ok(); err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ // Iterator holds 'version' lock, 'version' is immutable so snapshot
+ // can be released after iterator created.
+ return db.newIterator(nil, nil, se.seq, slice, ro)
+}
+
+// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
+// is a frozen snapshot of a DB state at a particular point in time. The
+// content of snapshot are guaranteed to be consistent.
+//
+// The snapshot must be released after use, by calling Release method.
+func (db *DB) GetSnapshot() (*Snapshot, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ return db.newSnapshot(), nil
+}
+
+// GetProperty returns value of the given property name.
+//
+// Property names:
+// leveldb.num-files-at-level{n}
+// Returns the number of files at level 'n'.
+// leveldb.stats
+// Returns statistics of the underlying DB.
+// leveldb.sstables
+// Returns sstables list for each level.
+// leveldb.blockpool
+// Returns block pool stats.
+// leveldb.cachedblock
+// Returns size of cached block.
+// leveldb.openedtables
+// Returns number of opened tables.
+// leveldb.alivesnaps
+// Returns number of alive snapshots.
+// leveldb.aliveiters
+// Returns number of alive iterators.
+func (db *DB) GetProperty(name string) (value string, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ const prefix = "leveldb."
+ if !strings.HasPrefix(name, prefix) {
+ return "", ErrNotFound
+ }
+ p := name[len(prefix):]
+
+ v := db.s.version()
+ defer v.release()
+
+ numFilesPrefix := "num-files-at-level"
+ switch {
+ case strings.HasPrefix(p, numFilesPrefix):
+ var level uint
+ var rest string
+ n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
+ if n != 1 {
+ err = ErrNotFound
+ } else {
+ value = fmt.Sprint(v.tLen(int(level)))
+ }
+ case p == "stats":
+ value = "Compactions\n" +
+ " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" +
+ "-------+------------+---------------+---------------+---------------+---------------\n"
+ for level, tables := range v.levels {
+ duration, read, write := db.compStats.getStat(level)
+ if len(tables) == 0 && duration == 0 {
+ continue
+ }
+ value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
+ level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
+ float64(read)/1048576.0, float64(write)/1048576.0)
+ }
+ case p == "sstables":
+ for level, tables := range v.levels {
+ value += fmt.Sprintf("--- level %d ---\n", level)
+ for _, t := range tables {
+ value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
+ }
+ }
+ case p == "blockpool":
+ value = fmt.Sprintf("%v", db.s.tops.bpool)
+ case p == "cachedblock":
+ if db.s.tops.bcache != nil {
+ value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
+ } else {
+ value = "<nil>"
+ }
+ case p == "openedtables":
+ value = fmt.Sprintf("%d", db.s.tops.cache.Size())
+ case p == "alivesnaps":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
+ case p == "aliveiters":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
+ default:
+ err = ErrNotFound
+ }
+
+ return
+}
+
+// SizeOf calculates approximate sizes of the given key ranges.
+// The length of the returned sizes are equal with the length of the given
+// ranges. The returned sizes measure storage space usage, so if the user
+// data compresses by a factor of ten, the returned sizes will be one-tenth
+// the size of the corresponding user data size.
+// The results may not include the sizes of recently written data.
+func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ v := db.s.version()
+ defer v.release()
+
+ sizes := make(Sizes, 0, len(ranges))
+ for _, r := range ranges {
+ imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
+ imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
+ start, err := v.offsetOf(imin)
+ if err != nil {
+ return nil, err
+ }
+ limit, err := v.offsetOf(imax)
+ if err != nil {
+ return nil, err
+ }
+ var size int64
+ if limit >= start {
+ size = limit - start
+ }
+ sizes = append(sizes, size)
+ }
+
+ return sizes, nil
+}
+
+// Close closes the DB. This will also releases any outstanding snapshot,
+// abort any in-flight compaction and discard open transaction.
+//
+// It is not safe to close a DB until all outstanding iterators are released.
+// It is valid to call Close multiple times. Other methods should not be
+// called after the DB has been closed.
+func (db *DB) Close() error {
+ if !db.setClosed() {
+ return ErrClosed
+ }
+
+ start := time.Now()
+ db.log("db@close closing")
+
+ // Clear the finalizer.
+ runtime.SetFinalizer(db, nil)
+
+ // Get compaction error.
+ var err error
+ select {
+ case err = <-db.compErrC:
+ if err == ErrReadOnly {
+ err = nil
+ }
+ default:
+ }
+
+ // Signal all goroutines.
+ close(db.closeC)
+
+ // Discard open transaction.
+ if db.tr != nil {
+ db.tr.Discard()
+ }
+
+ // Acquire writer lock.
+ db.writeLockC <- struct{}{}
+
+ // Wait for all gorotines to exit.
+ db.closeW.Wait()
+
+ // Closes journal.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ db.journal = nil
+ db.journalWriter = nil
+ }
+
+ if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+ }
+
+ // Close session.
+ db.s.close()
+ db.logf("db@close done T·%v", time.Since(start))
+ db.s.release()
+
+ if db.closer != nil {
+ if err1 := db.closer.Close(); err == nil {
+ err = err1
+ }
+ db.closer = nil
+ }
+
+ // Clear memdbs.
+ db.clearMems()
+
+ return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
new file mode 100644
index 000000000..2d0ad0753
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
@@ -0,0 +1,826 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+var (
+ errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting")
+)
+
+type cStat struct {
+ duration time.Duration
+ read int64
+ write int64
+}
+
+func (p *cStat) add(n *cStatStaging) {
+ p.duration += n.duration
+ p.read += n.read
+ p.write += n.write
+}
+
+func (p *cStat) get() (duration time.Duration, read, write int64) {
+ return p.duration, p.read, p.write
+}
+
+type cStatStaging struct {
+ start time.Time
+ duration time.Duration
+ on bool
+ read int64
+ write int64
+}
+
+func (p *cStatStaging) startTimer() {
+ if !p.on {
+ p.start = time.Now()
+ p.on = true
+ }
+}
+
+func (p *cStatStaging) stopTimer() {
+ if p.on {
+ p.duration += time.Since(p.start)
+ p.on = false
+ }
+}
+
+type cStats struct {
+ lk sync.Mutex
+ stats []cStat
+}
+
+func (p *cStats) addStat(level int, n *cStatStaging) {
+ p.lk.Lock()
+ if level >= len(p.stats) {
+ newStats := make([]cStat, level+1)
+ copy(newStats, p.stats)
+ p.stats = newStats
+ }
+ p.stats[level].add(n)
+ p.lk.Unlock()
+}
+
+func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) {
+ p.lk.Lock()
+ defer p.lk.Unlock()
+ if level < len(p.stats) {
+ return p.stats[level].get()
+ }
+ return
+}
+
+func (db *DB) compactionError() {
+ var err error
+noerr:
+ // No error.
+ for {
+ select {
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
+ case err == ErrReadOnly, errors.IsCorrupted(err):
+ goto hasperr
+ default:
+ goto haserr
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+haserr:
+ // Transient error.
+ for {
+ select {
+ case db.compErrC <- err:
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
+ goto noerr
+ case err == ErrReadOnly, errors.IsCorrupted(err):
+ goto hasperr
+ default:
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+hasperr:
+ // Persistent error.
+ for {
+ select {
+ case db.compErrC <- err:
+ case db.compPerErrC <- err:
+ case db.writeLockC <- struct{}{}:
+ // Hold write lock, so that write won't pass-through.
+ db.compWriteLocking = true
+ case <-db.closeC:
+ if db.compWriteLocking {
+ // We should release the lock or Close will hang.
+ <-db.writeLockC
+ }
+ return
+ }
+ }
+}
+
+type compactionTransactCounter int
+
+func (cnt *compactionTransactCounter) incr() {
+ *cnt++
+}
+
+type compactionTransactInterface interface {
+ run(cnt *compactionTransactCounter) error
+ revert() error
+}
+
+func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
+ defer func() {
+ if x := recover(); x != nil {
+ if x == errCompactionTransactExiting {
+ if err := t.revert(); err != nil {
+ db.logf("%s revert error %q", name, err)
+ }
+ }
+ panic(x)
+ }
+ }()
+
+ const (
+ backoffMin = 1 * time.Second
+ backoffMax = 8 * time.Second
+ backoffMul = 2 * time.Second
+ )
+ var (
+ backoff = backoffMin
+ backoffT = time.NewTimer(backoff)
+ lastCnt = compactionTransactCounter(0)
+
+ disableBackoff = db.s.o.GetDisableCompactionBackoff()
+ )
+ for n := 0; ; n++ {
+ // Check whether the DB is closed.
+ if db.isClosed() {
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ } else if n > 0 {
+ db.logf("%s retrying N·%d", name, n)
+ }
+
+ // Execute.
+ cnt := compactionTransactCounter(0)
+ err := t.run(&cnt)
+ if err != nil {
+ db.logf("%s error I·%d %q", name, cnt, err)
+ }
+
+ // Set compaction error status.
+ select {
+ case db.compErrSetC <- err:
+ case perr := <-db.compPerErrC:
+ if err != nil {
+ db.logf("%s exiting (persistent error %q)", name, perr)
+ db.compactionExitTransact()
+ }
+ case <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ }
+ if err == nil {
+ return
+ }
+ if errors.IsCorrupted(err) {
+ db.logf("%s exiting (corruption detected)", name)
+ db.compactionExitTransact()
+ }
+
+ if !disableBackoff {
+ // Reset backoff duration if counter is advancing.
+ if cnt > lastCnt {
+ backoff = backoffMin
+ lastCnt = cnt
+ }
+
+ // Backoff.
+ backoffT.Reset(backoff)
+ if backoff < backoffMax {
+ backoff *= backoffMul
+ if backoff > backoffMax {
+ backoff = backoffMax
+ }
+ }
+ select {
+ case <-backoffT.C:
+ case <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ }
+ }
+ }
+}
+
+type compactionTransactFunc struct {
+ runFunc func(cnt *compactionTransactCounter) error
+ revertFunc func() error
+}
+
+func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error {
+ return t.runFunc(cnt)
+}
+
+func (t *compactionTransactFunc) revert() error {
+ if t.revertFunc != nil {
+ return t.revertFunc()
+ }
+ return nil
+}
+
+func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) {
+ db.compactionTransact(name, &compactionTransactFunc{run, revert})
+}
+
+func (db *DB) compactionExitTransact() {
+ panic(errCompactionTransactExiting)
+}
+
+func (db *DB) compactionCommit(name string, rec *sessionRecord) {
+ db.compCommitLk.Lock()
+ defer db.compCommitLk.Unlock() // Defer is necessary.
+ db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error {
+ return db.s.commit(rec)
+ }, nil)
+}
+
+func (db *DB) memCompaction() {
+ mdb := db.getFrozenMem()
+ if mdb == nil {
+ return
+ }
+ defer mdb.decref()
+
+ db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size()))
+
+ // Don't compact empty memdb.
+ if mdb.Len() == 0 {
+ db.logf("memdb@flush skipping")
+ // drop frozen memdb
+ db.dropFrozenMem()
+ return
+ }
+
+ // Pause table compaction.
+ resumeC := make(chan struct{})
+ select {
+ case db.tcompPauseC <- (chan<- struct{})(resumeC):
+ case <-db.compPerErrC:
+ close(resumeC)
+ resumeC = nil
+ case <-db.closeC:
+ return
+ }
+
+ var (
+ rec = &sessionRecord{}
+ stats = &cStatStaging{}
+ flushLevel int
+ )
+
+ // Generate tables.
+ db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
+ stats.startTimer()
+ flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel)
+ stats.stopTimer()
+ return
+ }, func() error {
+ for _, r := range rec.addedTables {
+ db.logf("memdb@flush revert @%d", r.num)
+ if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+
+ rec.setJournalNum(db.journalFd.Num)
+ rec.setSeqNum(db.frozenSeq)
+
+ // Commit.
+ stats.startTimer()
+ db.compactionCommit("memdb", rec)
+ stats.stopTimer()
+
+ db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
+
+ for _, r := range rec.addedTables {
+ stats.write += r.size
+ }
+ db.compStats.addStat(flushLevel, stats)
+
+ // Drop frozen memdb.
+ db.dropFrozenMem()
+
+ // Resume table compaction.
+ if resumeC != nil {
+ select {
+ case <-resumeC:
+ close(resumeC)
+ case <-db.closeC:
+ return
+ }
+ }
+
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+}
+
+type tableCompactionBuilder struct {
+ db *DB
+ s *session
+ c *compaction
+ rec *sessionRecord
+ stat0, stat1 *cStatStaging
+
+ snapHasLastUkey bool
+ snapLastUkey []byte
+ snapLastSeq uint64
+ snapIter int
+ snapKerrCnt int
+ snapDropCnt int
+
+ kerrCnt int
+ dropCnt int
+
+ minSeq uint64
+ strict bool
+ tableSize int
+
+ tw *tWriter
+}
+
+func (b *tableCompactionBuilder) appendKV(key, value []byte) error {
+ // Create new table if not already.
+ if b.tw == nil {
+ // Check for pause event.
+ if b.db != nil {
+ select {
+ case ch := <-b.db.tcompPauseC:
+ b.db.pauseCompaction(ch)
+ case <-b.db.closeC:
+ b.db.compactionExitTransact()
+ default:
+ }
+ }
+
+ // Create new table.
+ var err error
+ b.tw, err = b.s.tops.create()
+ if err != nil {
+ return err
+ }
+ }
+
+ // Write key/value into table.
+ return b.tw.append(key, value)
+}
+
+func (b *tableCompactionBuilder) needFlush() bool {
+ return b.tw.tw.BytesLen() >= b.tableSize
+}
+
+func (b *tableCompactionBuilder) flush() error {
+ t, err := b.tw.finish()
+ if err != nil {
+ return err
+ }
+ b.rec.addTableFile(b.c.sourceLevel+1, t)
+ b.stat1.write += t.size
+ b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
+ b.tw = nil
+ return nil
+}
+
+func (b *tableCompactionBuilder) cleanup() {
+ if b.tw != nil {
+ b.tw.drop()
+ b.tw = nil
+ }
+}
+
+func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
+ snapResumed := b.snapIter > 0
+ hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary.
+ lastUkey := append([]byte{}, b.snapLastUkey...)
+ lastSeq := b.snapLastSeq
+ b.kerrCnt = b.snapKerrCnt
+ b.dropCnt = b.snapDropCnt
+ // Restore compaction state.
+ b.c.restore()
+
+ defer b.cleanup()
+
+ b.stat1.startTimer()
+ defer b.stat1.stopTimer()
+
+ iter := b.c.newIterator()
+ defer iter.Release()
+ for i := 0; iter.Next(); i++ {
+ // Incr transact counter.
+ cnt.incr()
+
+ // Skip until last state.
+ if i < b.snapIter {
+ continue
+ }
+
+ resumed := false
+ if snapResumed {
+ resumed = true
+ snapResumed = false
+ }
+
+ ikey := iter.Key()
+ ukey, seq, kt, kerr := parseInternalKey(ikey)
+
+ if kerr == nil {
+ shouldStop := !resumed && b.c.shouldStopBefore(ikey)
+
+ if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 {
+ // First occurrence of this user key.
+
+ // Only rotate tables if ukey doesn't hop across.
+ if b.tw != nil && (shouldStop || b.needFlush()) {
+ if err := b.flush(); err != nil {
+ return err
+ }
+
+ // Creates snapshot of the state.
+ b.c.save()
+ b.snapHasLastUkey = hasLastUkey
+ b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...)
+ b.snapLastSeq = lastSeq
+ b.snapIter = i
+ b.snapKerrCnt = b.kerrCnt
+ b.snapDropCnt = b.dropCnt
+ }
+
+ hasLastUkey = true
+ lastUkey = append(lastUkey[:0], ukey...)
+ lastSeq = keyMaxSeq
+ }
+
+ switch {
+ case lastSeq <= b.minSeq:
+ // Dropped because newer entry for same user key exist
+ fallthrough // (A)
+ case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
+ // For this user key:
+ // (1) there is no data in higher levels
+ // (2) data in lower levels will have larger seq numbers
+ // (3) data in layers that are being compacted here and have
+ // smaller seq numbers will be dropped in the next
+ // few iterations of this loop (by rule (A) above).
+ // Therefore this deletion marker is obsolete and can be dropped.
+ lastSeq = seq
+ b.dropCnt++
+ continue
+ default:
+ lastSeq = seq
+ }
+ } else {
+ if b.strict {
+ return kerr
+ }
+
+ // Don't drop corrupted keys.
+ hasLastUkey = false
+ lastUkey = lastUkey[:0]
+ lastSeq = keyMaxSeq
+ b.kerrCnt++
+ }
+
+ if err := b.appendKV(ikey, iter.Value()); err != nil {
+ return err
+ }
+ }
+
+ if err := iter.Error(); err != nil {
+ return err
+ }
+
+ // Finish last table.
+ if b.tw != nil && !b.tw.empty() {
+ return b.flush()
+ }
+ return nil
+}
+
+func (b *tableCompactionBuilder) revert() error {
+ for _, at := range b.rec.addedTables {
+ b.s.logf("table@build revert @%d", at.num)
+ if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
+ defer c.release()
+
+ rec := &sessionRecord{}
+ rec.addCompPtr(c.sourceLevel, c.imax)
+
+ if !noTrivial && c.trivial() {
+ t := c.levels[0][0]
+ db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1)
+ rec.delTable(c.sourceLevel, t.fd.Num)
+ rec.addTableFile(c.sourceLevel+1, t)
+ db.compactionCommit("table-move", rec)
+ return
+ }
+
+ var stats [2]cStatStaging
+ for i, tables := range c.levels {
+ for _, t := range tables {
+ stats[i].read += t.size
+ // Insert deleted tables into record
+ rec.delTable(c.sourceLevel+i, t.fd.Num)
+ }
+ }
+ sourceSize := int(stats[0].read + stats[1].read)
+ minSeq := db.minSeq()
+ db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq)
+
+ b := &tableCompactionBuilder{
+ db: db,
+ s: db.s,
+ c: c,
+ rec: rec,
+ stat1: &stats[1],
+ minSeq: minSeq,
+ strict: db.s.o.GetStrict(opt.StrictCompaction),
+ tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1),
+ }
+ db.compactionTransact("table@build", b)
+
+ // Commit.
+ stats[1].startTimer()
+ db.compactionCommit("table", rec)
+ stats[1].stopTimer()
+
+ resultSize := int(stats[1].write)
+ db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
+
+ // Save compaction stats
+ for i := range stats {
+ db.compStats.addStat(c.sourceLevel+1, &stats[i])
+ }
+}
+
+func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error {
+ db.logf("table@compaction range L%d %q:%q", level, umin, umax)
+ if level >= 0 {
+ if c := db.s.getCompactionRange(level, umin, umax, true); c != nil {
+ db.tableCompaction(c, true)
+ }
+ } else {
+ // Retry until nothing to compact.
+ for {
+ compacted := false
+
+ // Scan for maximum level with overlapped tables.
+ v := db.s.version()
+ m := 1
+ for i := m; i < len(v.levels); i++ {
+ tables := v.levels[i]
+ if tables.overlaps(db.s.icmp, umin, umax, false) {
+ m = i
+ }
+ }
+ v.release()
+
+ for level := 0; level < m; level++ {
+ if c := db.s.getCompactionRange(level, umin, umax, false); c != nil {
+ db.tableCompaction(c, true)
+ compacted = true
+ }
+ }
+
+ if !compacted {
+ break
+ }
+ }
+ }
+
+ return nil
+}
+
+func (db *DB) tableAutoCompaction() {
+ if c := db.s.pickCompaction(); c != nil {
+ db.tableCompaction(c, false)
+ }
+}
+
+func (db *DB) tableNeedCompaction() bool {
+ v := db.s.version()
+ defer v.release()
+ return v.needCompaction()
+}
+
+func (db *DB) pauseCompaction(ch chan<- struct{}) {
+ select {
+ case ch <- struct{}{}:
+ case <-db.closeC:
+ db.compactionExitTransact()
+ }
+}
+
+type cCmd interface {
+ ack(err error)
+}
+
+type cAuto struct {
+ ackC chan<- error
+}
+
+func (r cAuto) ack(err error) {
+ if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
+ r.ackC <- err
+ }
+}
+
+type cRange struct {
+ level int
+ min, max []byte
+ ackC chan<- error
+}
+
+func (r cRange) ack(err error) {
+ if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
+ r.ackC <- err
+ }
+}
+
+// This will trigger auto compaction but will not wait for it.
+func (db *DB) compTrigger(compC chan<- cCmd) {
+ select {
+ case compC <- cAuto{}:
+ default:
+ }
+}
+
+// This will trigger auto compaction and/or wait for all compaction to be done.
+func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) {
+ ch := make(chan error)
+ defer close(ch)
+ // Send cmd.
+ select {
+ case compC <- cAuto{ch}:
+ case err = <-db.compErrC:
+ return
+ case <-db.closeC:
+ return ErrClosed
+ }
+ // Wait cmd.
+ select {
+ case err = <-ch:
+ case err = <-db.compErrC:
+ case <-db.closeC:
+ return ErrClosed
+ }
+ return err
+}
+
+// Send range compaction request.
+func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
+ ch := make(chan error)
+ defer close(ch)
+ // Send cmd.
+ select {
+ case compC <- cRange{level, min, max, ch}:
+ case err := <-db.compErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+ // Wait cmd.
+ select {
+ case err = <-ch:
+ case err = <-db.compErrC:
+ case <-db.closeC:
+ return ErrClosed
+ }
+ return err
+}
+
+func (db *DB) mCompaction() {
+ var x cCmd
+
+ defer func() {
+ if x := recover(); x != nil {
+ if x != errCompactionTransactExiting {
+ panic(x)
+ }
+ }
+ if x != nil {
+ x.ack(ErrClosed)
+ }
+ db.closeW.Done()
+ }()
+
+ for {
+ select {
+ case x = <-db.mcompCmdC:
+ switch x.(type) {
+ case cAuto:
+ db.memCompaction()
+ x.ack(nil)
+ x = nil
+ default:
+ panic("leveldb: unknown command")
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+}
+
+func (db *DB) tCompaction() {
+ var x cCmd
+ var ackQ []cCmd
+
+ defer func() {
+ if x := recover(); x != nil {
+ if x != errCompactionTransactExiting {
+ panic(x)
+ }
+ }
+ for i := range ackQ {
+ ackQ[i].ack(ErrClosed)
+ ackQ[i] = nil
+ }
+ if x != nil {
+ x.ack(ErrClosed)
+ }
+ db.closeW.Done()
+ }()
+
+ for {
+ if db.tableNeedCompaction() {
+ select {
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
+ continue
+ case <-db.closeC:
+ return
+ default:
+ }
+ } else {
+ for i := range ackQ {
+ ackQ[i].ack(nil)
+ ackQ[i] = nil
+ }
+ ackQ = ackQ[:0]
+ select {
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
+ continue
+ case <-db.closeC:
+ return
+ }
+ }
+ if x != nil {
+ switch cmd := x.(type) {
+ case cAuto:
+ ackQ = append(ackQ, x)
+ case cRange:
+ x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max))
+ default:
+ panic("leveldb: unknown command")
+ }
+ x = nil
+ }
+ db.tableAutoCompaction()
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
new file mode 100644
index 000000000..03c24cdab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
@@ -0,0 +1,360 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "errors"
+ "math/rand"
+ "runtime"
+ "sync"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+ errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key")
+)
+
+type memdbReleaser struct {
+ once sync.Once
+ m *memDB
+}
+
+func (mr *memdbReleaser) Release() {
+ mr.once.Do(func() {
+ mr.m.decref()
+ })
+}
+
+func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
+ em, fm := db.getMems()
+ v := db.s.version()
+
+ tableIts := v.getIterators(slice, ro)
+ n := len(tableIts) + len(auxt) + 3
+ its := make([]iterator.Iterator, 0, n)
+
+ if auxm != nil {
+ ami := auxm.NewIterator(slice)
+ ami.SetReleaser(&memdbReleaser{m: auxm})
+ its = append(its, ami)
+ }
+ for _, t := range auxt {
+ its = append(its, v.s.tops.newIterator(t, slice, ro))
+ }
+
+ emi := em.NewIterator(slice)
+ emi.SetReleaser(&memdbReleaser{m: em})
+ its = append(its, emi)
+ if fm != nil {
+ fmi := fm.NewIterator(slice)
+ fmi.SetReleaser(&memdbReleaser{m: fm})
+ its = append(its, fmi)
+ }
+ its = append(its, tableIts...)
+ mi := iterator.NewMergedIterator(its, db.s.icmp, strict)
+ mi.SetReleaser(&versionReleaser{v: v})
+ return mi
+}
+
+func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
+ var islice *util.Range
+ if slice != nil {
+ islice = &util.Range{}
+ if slice.Start != nil {
+ islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek)
+ }
+ if slice.Limit != nil {
+ islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek)
+ }
+ }
+ rawIter := db.newRawIterator(auxm, auxt, islice, ro)
+ iter := &dbIter{
+ db: db,
+ icmp: db.s.icmp,
+ iter: rawIter,
+ seq: seq,
+ strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
+ key: make([]byte, 0),
+ value: make([]byte, 0),
+ }
+ atomic.AddInt32(&db.aliveIters, 1)
+ runtime.SetFinalizer(iter, (*dbIter).Release)
+ return iter
+}
+
+func (db *DB) iterSamplingRate() int {
+ return rand.Intn(2 * db.s.o.GetIteratorSamplingRate())
+}
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+// dbIter represent an interator states over a database session.
+type dbIter struct {
+ db *DB
+ icmp *iComparer
+ iter iterator.Iterator
+ seq uint64
+ strict bool
+
+ smaplingGap int
+ dir dir
+ key []byte
+ value []byte
+ err error
+ releaser util.Releaser
+}
+
+func (i *dbIter) sampleSeek() {
+ ikey := i.iter.Key()
+ i.smaplingGap -= len(ikey) + len(i.iter.Value())
+ for i.smaplingGap < 0 {
+ i.smaplingGap += i.db.iterSamplingRate()
+ i.db.sampleSeek(ikey)
+ }
+}
+
+func (i *dbIter) setErr(err error) {
+ i.err = err
+ i.key = nil
+ i.value = nil
+}
+
+func (i *dbIter) iterErr() {
+ if err := i.iter.Error(); err != nil {
+ i.setErr(err)
+ }
+}
+
+func (i *dbIter) Valid() bool {
+ return i.err == nil && i.dir > dirEOI
+}
+
+func (i *dbIter) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.iter.First() {
+ i.dir = dirSOI
+ return i.next()
+ }
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.iter.Last() {
+ return i.prev()
+ }
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek)
+ if i.iter.Seek(ikey) {
+ i.dir = dirSOI
+ return i.next()
+ }
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) next() bool {
+ for {
+ if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if seq <= i.seq {
+ switch kt {
+ case keyTypeDel:
+ // Skip deleted key.
+ i.key = append(i.key[:0], ukey...)
+ i.dir = dirForward
+ case keyTypeVal:
+ if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
+ i.key = append(i.key[:0], ukey...)
+ i.value = append(i.value[:0], i.iter.Value()...)
+ i.dir = dirForward
+ return true
+ }
+ }
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ break
+ }
+ if !i.iter.Next() {
+ i.dir = dirEOI
+ i.iterErr()
+ break
+ }
+ }
+ return false
+}
+
+func (i *dbIter) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.iter.Next() || (i.dir == dirBackward && !i.iter.Next()) {
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+ }
+ return i.next()
+}
+
+func (i *dbIter) prev() bool {
+ i.dir = dirBackward
+ del := true
+ if i.iter.Valid() {
+ for {
+ if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if seq <= i.seq {
+ if !del && i.icmp.uCompare(ukey, i.key) < 0 {
+ return true
+ }
+ del = (kt == keyTypeDel)
+ if !del {
+ i.key = append(i.key[:0], ukey...)
+ i.value = append(i.value[:0], i.iter.Value()...)
+ }
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ return false
+ }
+ if !i.iter.Prev() {
+ break
+ }
+ }
+ }
+ if del {
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+ }
+ return true
+}
+
+func (i *dbIter) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirEOI:
+ return i.Last()
+ case dirForward:
+ for i.iter.Prev() {
+ if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if i.icmp.uCompare(ukey, i.key) < 0 {
+ goto cont
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ return false
+ }
+ }
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+ }
+
+cont:
+ return i.prev()
+}
+
+func (i *dbIter) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.key
+}
+
+func (i *dbIter) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.value
+}
+
+func (i *dbIter) Release() {
+ if i.dir != dirReleased {
+ // Clear the finalizer.
+ runtime.SetFinalizer(i, nil)
+
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+
+ i.dir = dirReleased
+ i.key = nil
+ i.value = nil
+ i.iter.Release()
+ i.iter = nil
+ atomic.AddInt32(&i.db.aliveIters, -1)
+ i.db = nil
+ }
+}
+
+func (i *dbIter) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *dbIter) Error() error {
+ return i.err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
new file mode 100644
index 000000000..2c69d2e53
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
@@ -0,0 +1,183 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "container/list"
+ "fmt"
+ "runtime"
+ "sync"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type snapshotElement struct {
+ seq uint64
+ ref int
+ e *list.Element
+}
+
+// Acquires a snapshot, based on latest sequence.
+func (db *DB) acquireSnapshot() *snapshotElement {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ seq := db.getSeq()
+
+ if e := db.snapsList.Back(); e != nil {
+ se := e.Value.(*snapshotElement)
+ if se.seq == seq {
+ se.ref++
+ return se
+ } else if seq < se.seq {
+ panic("leveldb: sequence number is not increasing")
+ }
+ }
+ se := &snapshotElement{seq: seq, ref: 1}
+ se.e = db.snapsList.PushBack(se)
+ return se
+}
+
+// Releases given snapshot element.
+func (db *DB) releaseSnapshot(se *snapshotElement) {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ se.ref--
+ if se.ref == 0 {
+ db.snapsList.Remove(se.e)
+ se.e = nil
+ } else if se.ref < 0 {
+ panic("leveldb: Snapshot: negative element reference")
+ }
+}
+
+// Gets minimum sequence that not being snapshotted.
+func (db *DB) minSeq() uint64 {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ if e := db.snapsList.Front(); e != nil {
+ return e.Value.(*snapshotElement).seq
+ }
+
+ return db.getSeq()
+}
+
+// Snapshot is a DB snapshot.
+type Snapshot struct {
+ db *DB
+ elem *snapshotElement
+ mu sync.RWMutex
+ released bool
+}
+
+// Creates new snapshot object.
+func (db *DB) newSnapshot() *Snapshot {
+ snap := &Snapshot{
+ db: db,
+ elem: db.acquireSnapshot(),
+ }
+ atomic.AddInt32(&db.aliveSnaps, 1)
+ runtime.SetFinalizer(snap, (*Snapshot).Release)
+ return snap
+}
+
+func (snap *Snapshot) String() string {
+ return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq)
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if
+// the DB does not contains the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = snap.db.ok()
+ if err != nil {
+ return
+ }
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
+ err = ErrSnapshotReleased
+ return
+ }
+ return snap.db.get(nil, nil, key, snap.elem.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = snap.db.ok()
+ if err != nil {
+ return
+ }
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
+ err = ErrSnapshotReleased
+ return
+ }
+ return snap.db.has(nil, nil, key, snap.elem.seq, ro)
+}
+
+// NewIterator returns an iterator for the snapshot of the underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+// Releasing the snapshot doesn't mean releasing the iterator too, the
+// iterator would be still valid until released.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := snap.db.ok(); err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+ if snap.released {
+ return iterator.NewEmptyIterator(ErrSnapshotReleased)
+ }
+ // Since iterator already hold version ref, it doesn't need to
+ // hold snapshot ref.
+ return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro)
+}
+
+// Release releases the snapshot. This will not release any returned
+// iterators, the iterators would still be valid until released or the
+// underlying DB is closed.
+//
+// Other methods should not be called after the snapshot has been released.
+func (snap *Snapshot) Release() {
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+
+ if !snap.released {
+ // Clear the finalizer.
+ runtime.SetFinalizer(snap, nil)
+
+ snap.released = true
+ snap.db.releaseSnapshot(snap.elem)
+ atomic.AddInt32(&snap.db.aliveSnaps, -1)
+ snap.db = nil
+ snap.elem = nil
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
new file mode 100644
index 000000000..85b02d24b
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
@@ -0,0 +1,234 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync/atomic"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+type memDB struct {
+ db *DB
+ *memdb.DB
+ ref int32
+}
+
+func (m *memDB) getref() int32 {
+ return atomic.LoadInt32(&m.ref)
+}
+
+func (m *memDB) incref() {
+ atomic.AddInt32(&m.ref, 1)
+}
+
+func (m *memDB) decref() {
+ if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
+ // Only put back memdb with std capacity.
+ if m.Capacity() == m.db.s.o.GetWriteBuffer() {
+ m.Reset()
+ m.db.mpoolPut(m.DB)
+ }
+ m.db = nil
+ m.DB = nil
+ } else if ref < 0 {
+ panic("negative memdb ref")
+ }
+}
+
+// Get latest sequence number.
+func (db *DB) getSeq() uint64 {
+ return atomic.LoadUint64(&db.seq)
+}
+
+// Atomically adds delta to seq.
+func (db *DB) addSeq(delta uint64) {
+ atomic.AddUint64(&db.seq, delta)
+}
+
+func (db *DB) setSeq(seq uint64) {
+ atomic.StoreUint64(&db.seq, seq)
+}
+
+func (db *DB) sampleSeek(ikey internalKey) {
+ v := db.s.version()
+ if v.sampleSeek(ikey) {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ v.release()
+}
+
+func (db *DB) mpoolPut(mem *memdb.DB) {
+ if !db.isClosed() {
+ select {
+ case db.memPool <- mem:
+ default:
+ }
+ }
+}
+
+func (db *DB) mpoolGet(n int) *memDB {
+ var mdb *memdb.DB
+ select {
+ case mdb = <-db.memPool:
+ default:
+ }
+ if mdb == nil || mdb.Capacity() < n {
+ mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
+ }
+ return &memDB{
+ db: db,
+ DB: mdb,
+ }
+}
+
+func (db *DB) mpoolDrain() {
+ ticker := time.NewTicker(30 * time.Second)
+ for {
+ select {
+ case <-ticker.C:
+ select {
+ case <-db.memPool:
+ default:
+ }
+ case <-db.closeC:
+ ticker.Stop()
+ // Make sure the pool is drained.
+ select {
+ case <-db.memPool:
+ case <-time.After(time.Second):
+ }
+ close(db.memPool)
+ return
+ }
+ }
+}
+
+// Create new memdb and froze the old one; need external synchronization.
+// newMem only called synchronously by the writer.
+func (db *DB) newMem(n int) (mem *memDB, err error) {
+ fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()}
+ w, err := db.s.stor.Create(fd)
+ if err != nil {
+ db.s.reuseFileNum(fd.Num)
+ return
+ }
+
+ db.memMu.Lock()
+ defer db.memMu.Unlock()
+
+ if db.frozenMem != nil {
+ panic("still has frozen mem")
+ }
+
+ if db.journal == nil {
+ db.journal = journal.NewWriter(w)
+ } else {
+ db.journal.Reset(w)
+ db.journalWriter.Close()
+ db.frozenJournalFd = db.journalFd
+ }
+ db.journalWriter = w
+ db.journalFd = fd
+ db.frozenMem = db.mem
+ mem = db.mpoolGet(n)
+ mem.incref() // for self
+ mem.incref() // for caller
+ db.mem = mem
+ // The seq only incremented by the writer. And whoever called newMem
+ // should hold write lock, so no need additional synchronization here.
+ db.frozenSeq = db.seq
+ return
+}
+
+// Get all memdbs.
+func (db *DB) getMems() (e, f *memDB) {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem != nil {
+ db.mem.incref()
+ } else if !db.isClosed() {
+ panic("nil effective mem")
+ }
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.mem, db.frozenMem
+}
+
+// Get effective memdb.
+func (db *DB) getEffectiveMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem != nil {
+ db.mem.incref()
+ } else if !db.isClosed() {
+ panic("nil effective mem")
+ }
+ return db.mem
+}
+
+// Check whether we has frozen memdb.
+func (db *DB) hasFrozenMem() bool {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ return db.frozenMem != nil
+}
+
+// Get frozen memdb.
+func (db *DB) getFrozenMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.frozenMem
+}
+
+// Drop frozen memdb; assume that frozen memdb isn't nil.
+func (db *DB) dropFrozenMem() {
+ db.memMu.Lock()
+ if err := db.s.stor.Remove(db.frozenJournalFd); err != nil {
+ db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err)
+ } else {
+ db.logf("journal@remove removed @%d", db.frozenJournalFd.Num)
+ }
+ db.frozenJournalFd = storage.FileDesc{}
+ db.frozenMem.decref()
+ db.frozenMem = nil
+ db.memMu.Unlock()
+}
+
+// Clear mems ptr; used by DB.Close().
+func (db *DB) clearMems() {
+ db.memMu.Lock()
+ db.mem = nil
+ db.frozenMem = nil
+ db.memMu.Unlock()
+}
+
+// Set closed flag; return true if not already closed.
+func (db *DB) setClosed() bool {
+ return atomic.CompareAndSwapUint32(&db.closed, 0, 1)
+}
+
+// Check whether DB was closed.
+func (db *DB) isClosed() bool {
+ return atomic.LoadUint32(&db.closed) != 0
+}
+
+// Check read ok status.
+func (db *DB) ok() error {
+ if db.isClosed() {
+ return ErrClosed
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
new file mode 100644
index 000000000..b8f7e7d21
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
@@ -0,0 +1,325 @@
+// Copyright (c) 2016, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "errors"
+ "sync"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var errTransactionDone = errors.New("leveldb: transaction already closed")
+
+// Transaction is the transaction handle.
+type Transaction struct {
+ db *DB
+ lk sync.RWMutex
+ seq uint64
+ mem *memDB
+ tables tFiles
+ ikScratch []byte
+ rec sessionRecord
+ stats cStatStaging
+ closed bool
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return nil, errTransactionDone
+ }
+ return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Has returns.
+func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return false, errTransactionDone
+ }
+ return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the transaction.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently while writes to the
+// transaction. The resultant key/value pairs are guaranteed to be consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return iterator.NewEmptyIterator(errTransactionDone)
+ }
+ tr.mem.incref()
+ return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro)
+}
+
+func (tr *Transaction) flush() error {
+ // Flush memdb.
+ if tr.mem.Len() != 0 {
+ tr.stats.startTimer()
+ iter := tr.mem.NewIterator(nil)
+ t, n, err := tr.db.s.tops.createFrom(iter)
+ iter.Release()
+ tr.stats.stopTimer()
+ if err != nil {
+ return err
+ }
+ if tr.mem.getref() == 1 {
+ tr.mem.Reset()
+ } else {
+ tr.mem.decref()
+ tr.mem = tr.db.mpoolGet(0)
+ tr.mem.incref()
+ }
+ tr.tables = append(tr.tables, t)
+ tr.rec.addTableFile(0, t)
+ tr.stats.write += t.size
+ tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+ }
+ return nil
+}
+
+func (tr *Transaction) put(kt keyType, key, value []byte) error {
+ tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt)
+ if tr.mem.Free() < len(tr.ikScratch)+len(value) {
+ if err := tr.flush(); err != nil {
+ return err
+ }
+ }
+ if err := tr.mem.Put(tr.ikScratch, value); err != nil {
+ return err
+ }
+ tr.seq++
+ return nil
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error {
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return tr.put(keyTypeVal, key, value)
+}
+
+// Delete deletes the value for the given key.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error {
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return tr.put(keyTypeDel, key, nil)
+}
+
+// Write apply the given batch to the transaction. The batch will be applied
+// sequentially.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Write returns.
+func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error {
+ if b == nil || b.Len() == 0 {
+ return nil
+ }
+
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return b.replayInternal(func(i int, kt keyType, k, v []byte) error {
+ return tr.put(kt, k, v)
+ })
+}
+
+func (tr *Transaction) setDone() {
+ tr.closed = true
+ tr.db.tr = nil
+ tr.mem.decref()
+ <-tr.db.writeLockC
+}
+
+// Commit commits the transaction. If error is not nil, then the transaction is
+// not committed, it can then either be retried or discarded.
+//
+// Other methods should not be called after transaction has been committed.
+func (tr *Transaction) Commit() error {
+ if err := tr.db.ok(); err != nil {
+ return err
+ }
+
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ if err := tr.flush(); err != nil {
+ // Return error, lets user decide either to retry or discard
+ // transaction.
+ return err
+ }
+ if len(tr.tables) != 0 {
+ // Committing transaction.
+ tr.rec.setSeqNum(tr.seq)
+ tr.db.compCommitLk.Lock()
+ tr.stats.startTimer()
+ var cerr error
+ for retry := 0; retry < 3; retry++ {
+ cerr = tr.db.s.commit(&tr.rec)
+ if cerr != nil {
+ tr.db.logf("transaction@commit error R·%d %q", retry, cerr)
+ select {
+ case <-time.After(time.Second):
+ case <-tr.db.closeC:
+ tr.db.logf("transaction@commit exiting")
+ tr.db.compCommitLk.Unlock()
+ return cerr
+ }
+ } else {
+ // Success. Set db.seq.
+ tr.db.setSeq(tr.seq)
+ break
+ }
+ }
+ tr.stats.stopTimer()
+ if cerr != nil {
+ // Return error, lets user decide either to retry or discard
+ // transaction.
+ return cerr
+ }
+
+ // Update compaction stats. This is safe as long as we hold compCommitLk.
+ tr.db.compStats.addStat(0, &tr.stats)
+
+ // Trigger table auto-compaction.
+ tr.db.compTrigger(tr.db.tcompCmdC)
+ tr.db.compCommitLk.Unlock()
+
+ // Additionally, wait compaction when certain threshold reached.
+ // Ignore error, returns error only if transaction can't be committed.
+ tr.db.waitCompaction()
+ }
+ // Only mark as done if transaction committed successfully.
+ tr.setDone()
+ return nil
+}
+
+func (tr *Transaction) discard() {
+ // Discard transaction.
+ for _, t := range tr.tables {
+ tr.db.logf("transaction@discard @%d", t.fd.Num)
+ if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil {
+ tr.db.s.reuseFileNum(t.fd.Num)
+ }
+ }
+}
+
+// Discard discards the transaction.
+//
+// Other methods should not be called after transaction has been discarded.
+func (tr *Transaction) Discard() {
+ tr.lk.Lock()
+ if !tr.closed {
+ tr.discard()
+ tr.setDone()
+ }
+ tr.lk.Unlock()
+}
+
+func (db *DB) waitCompaction() error {
+ if db.s.tLen(0) >= db.s.o.GetWriteL0PauseTrigger() {
+ return db.compTriggerWait(db.tcompCmdC)
+ }
+ return nil
+}
+
+// OpenTransaction opens an atomic DB transaction. Only one transaction can be
+// opened at a time. Subsequent call to Write and OpenTransaction will be blocked
+// until in-flight transaction is committed or discarded.
+// The returned transaction handle is safe for concurrent use.
+//
+// Transaction is expensive and can overwhelm compaction, especially if
+// transaction size is small. Use with caution.
+//
+// The transaction must be closed once done, either by committing or discarding
+// the transaction.
+// Closing the DB will discard open transaction.
+func (db *DB) OpenTransaction() (*Transaction, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ // The write happen synchronously.
+ select {
+ case db.writeLockC <- struct{}{}:
+ case err := <-db.compPerErrC:
+ return nil, err
+ case <-db.closeC:
+ return nil, ErrClosed
+ }
+
+ if db.tr != nil {
+ panic("leveldb: has open transaction")
+ }
+
+ // Flush current memdb.
+ if db.mem != nil && db.mem.Len() != 0 {
+ if _, err := db.rotateMem(0, true); err != nil {
+ return nil, err
+ }
+ }
+
+ // Wait compaction when certain threshold reached.
+ if err := db.waitCompaction(); err != nil {
+ return nil, err
+ }
+
+ tr := &Transaction{
+ db: db,
+ seq: db.seq,
+ mem: db.mpoolGet(0),
+ }
+ tr.mem.incref()
+ db.tr = tr
+ return tr, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
new file mode 100644
index 000000000..7ecd960d2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
@@ -0,0 +1,102 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader is the interface that wraps basic Get and NewIterator methods.
+// This interface implemented by both DB and Snapshot.
+type Reader interface {
+ Get(key []byte, ro *opt.ReadOptions) (value []byte, err error)
+ NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator
+}
+
+// Sizes is list of size.
+type Sizes []int64
+
+// Sum returns sum of the sizes.
+func (sizes Sizes) Sum() int64 {
+ var sum int64
+ for _, size := range sizes {
+ sum += size
+ }
+ return sum
+}
+
+// Logging.
+func (db *DB) log(v ...interface{}) { db.s.log(v...) }
+func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) }
+
+// Check and clean files.
+func (db *DB) checkAndCleanFiles() error {
+ v := db.s.version()
+ defer v.release()
+
+ tmap := make(map[int64]bool)
+ for _, tables := range v.levels {
+ for _, t := range tables {
+ tmap[t.fd.Num] = false
+ }
+ }
+
+ fds, err := db.s.stor.List(storage.TypeAll)
+ if err != nil {
+ return err
+ }
+
+ var nt int
+ var rem []storage.FileDesc
+ for _, fd := range fds {
+ keep := true
+ switch fd.Type {
+ case storage.TypeManifest:
+ keep = fd.Num >= db.s.manifestFd.Num
+ case storage.TypeJournal:
+ if !db.frozenJournalFd.Zero() {
+ keep = fd.Num >= db.frozenJournalFd.Num
+ } else {
+ keep = fd.Num >= db.journalFd.Num
+ }
+ case storage.TypeTable:
+ _, keep = tmap[fd.Num]
+ if keep {
+ tmap[fd.Num] = true
+ nt++
+ }
+ }
+
+ if !keep {
+ rem = append(rem, fd)
+ }
+ }
+
+ if nt != len(tmap) {
+ var mfds []storage.FileDesc
+ for num, present := range tmap {
+ if !present {
+ mfds = append(mfds, storage.FileDesc{storage.TypeTable, num})
+ db.logf("db@janitor table missing @%d", num)
+ }
+ }
+ return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds})
+ }
+
+ db.logf("db@janitor F·%d G·%d", len(fds), len(rem))
+ for _, fd := range rem {
+ db.logf("db@janitor removing %s-%d", fd.Type, fd.Num)
+ if err := db.s.stor.Remove(fd); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
new file mode 100644
index 000000000..cc428b695
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
@@ -0,0 +1,443 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func (db *DB) writeJournal(batches []*Batch, seq uint64, sync bool) error {
+ wr, err := db.journal.Next()
+ if err != nil {
+ return err
+ }
+ if err := writeBatchesWithHeader(wr, batches, seq); err != nil {
+ return err
+ }
+ if err := db.journal.Flush(); err != nil {
+ return err
+ }
+ if sync {
+ return db.journalWriter.Sync()
+ }
+ return nil
+}
+
+func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) {
+ // Wait for pending memdb compaction.
+ err = db.compTriggerWait(db.mcompCmdC)
+ if err != nil {
+ return
+ }
+
+ // Create new memdb and journal.
+ mem, err = db.newMem(n)
+ if err != nil {
+ return
+ }
+
+ // Schedule memdb compaction.
+ if wait {
+ err = db.compTriggerWait(db.mcompCmdC)
+ } else {
+ db.compTrigger(db.mcompCmdC)
+ }
+ return
+}
+
+func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
+ delayed := false
+ slowdownTrigger := db.s.o.GetWriteL0SlowdownTrigger()
+ pauseTrigger := db.s.o.GetWriteL0PauseTrigger()
+ flush := func() (retry bool) {
+ mdb = db.getEffectiveMem()
+ if mdb == nil {
+ err = ErrClosed
+ return false
+ }
+ defer func() {
+ if retry {
+ mdb.decref()
+ mdb = nil
+ }
+ }()
+ tLen := db.s.tLen(0)
+ mdbFree = mdb.Free()
+ switch {
+ case tLen >= slowdownTrigger && !delayed:
+ delayed = true
+ time.Sleep(time.Millisecond)
+ case mdbFree >= n:
+ return false
+ case tLen >= pauseTrigger:
+ delayed = true
+ err = db.compTriggerWait(db.tcompCmdC)
+ if err != nil {
+ return false
+ }
+ default:
+ // Allow memdb to grow if it has no entry.
+ if mdb.Len() == 0 {
+ mdbFree = n
+ } else {
+ mdb.decref()
+ mdb, err = db.rotateMem(n, false)
+ if err == nil {
+ mdbFree = mdb.Free()
+ } else {
+ mdbFree = 0
+ }
+ }
+ return false
+ }
+ return true
+ }
+ start := time.Now()
+ for flush() {
+ }
+ if delayed {
+ db.writeDelay += time.Since(start)
+ db.writeDelayN++
+ } else if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+ db.writeDelay = 0
+ db.writeDelayN = 0
+ }
+ return
+}
+
+type writeMerge struct {
+ sync bool
+ batch *Batch
+ keyType keyType
+ key, value []byte
+}
+
+func (db *DB) unlockWrite(overflow bool, merged int, err error) {
+ for i := 0; i < merged; i++ {
+ db.writeAckC <- err
+ }
+ if overflow {
+ // Pass lock to the next write (that failed to merge).
+ db.writeMergedC <- false
+ } else {
+ // Release lock.
+ <-db.writeLockC
+ }
+}
+
+// ourBatch if defined should equal with batch.
+func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error {
+ // Try to flush memdb. This method would also trying to throttle writes
+ // if it is too fast and compaction cannot catch-up.
+ mdb, mdbFree, err := db.flush(batch.internalLen)
+ if err != nil {
+ db.unlockWrite(false, 0, err)
+ return err
+ }
+ defer mdb.decref()
+
+ var (
+ overflow bool
+ merged int
+ batches = []*Batch{batch}
+ )
+
+ if merge {
+ // Merge limit.
+ var mergeLimit int
+ if batch.internalLen > 128<<10 {
+ mergeLimit = (1 << 20) - batch.internalLen
+ } else {
+ mergeLimit = 128 << 10
+ }
+ mergeCap := mdbFree - batch.internalLen
+ if mergeLimit > mergeCap {
+ mergeLimit = mergeCap
+ }
+
+ merge:
+ for mergeLimit > 0 {
+ select {
+ case incoming := <-db.writeMergeC:
+ if incoming.batch != nil {
+ // Merge batch.
+ if incoming.batch.internalLen > mergeLimit {
+ overflow = true
+ break merge
+ }
+ batches = append(batches, incoming.batch)
+ mergeLimit -= incoming.batch.internalLen
+ } else {
+ // Merge put.
+ internalLen := len(incoming.key) + len(incoming.value) + 8
+ if internalLen > mergeLimit {
+ overflow = true
+ break merge
+ }
+ if ourBatch == nil {
+ ourBatch = db.batchPool.Get().(*Batch)
+ ourBatch.Reset()
+ batches = append(batches, ourBatch)
+ }
+ // We can use same batch since concurrent write doesn't
+ // guarantee write order.
+ ourBatch.appendRec(incoming.keyType, incoming.key, incoming.value)
+ mergeLimit -= internalLen
+ }
+ sync = sync || incoming.sync
+ merged++
+ db.writeMergedC <- true
+
+ default:
+ break merge
+ }
+ }
+ }
+
+ // Seq number.
+ seq := db.seq + 1
+
+ // Write journal.
+ if err := db.writeJournal(batches, seq, sync); err != nil {
+ db.unlockWrite(overflow, merged, err)
+ return err
+ }
+
+ // Put batches.
+ for _, batch := range batches {
+ if err := batch.putMem(seq, mdb.DB); err != nil {
+ panic(err)
+ }
+ seq += uint64(batch.Len())
+ }
+
+ // Incr seq number.
+ db.addSeq(uint64(batchesLen(batches)))
+
+ // Rotate memdb if it's reach the threshold.
+ if batch.internalLen >= mdbFree {
+ db.rotateMem(0, false)
+ }
+
+ db.unlockWrite(overflow, merged, nil)
+ return nil
+}
+
+// Write apply the given batch to the DB. The batch records will be applied
+// sequentially. Write might be used concurrently, when used concurrently and
+// batch is small enough, write will try to merge the batches. Set NoWriteMerge
+// option to true to disable write merge.
+//
+// It is safe to modify the contents of the arguments after Write returns but
+// not before. Write will not modify content of the batch.
+func (db *DB) Write(batch *Batch, wo *opt.WriteOptions) error {
+ if err := db.ok(); err != nil || batch == nil || batch.Len() == 0 {
+ return err
+ }
+
+ // If the batch size is larger than write buffer, it may justified to write
+ // using transaction instead. Using transaction the batch will be written
+ // into tables directly, skipping the journaling.
+ if batch.internalLen > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() {
+ tr, err := db.OpenTransaction()
+ if err != nil {
+ return err
+ }
+ if err := tr.Write(batch, wo); err != nil {
+ tr.Discard()
+ return err
+ }
+ return tr.Commit()
+ }
+
+ merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+ sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+ // Acquire write lock.
+ if merge {
+ select {
+ case db.writeMergeC <- writeMerge{sync: sync, batch: batch}:
+ if <-db.writeMergedC {
+ // Write is merged.
+ return <-db.writeAckC
+ }
+ // Write is not merged, the write lock is handed to us. Continue.
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ } else {
+ select {
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ }
+
+ return db.writeLocked(batch, nil, merge, sync)
+}
+
+func (db *DB) putRec(kt keyType, key, value []byte, wo *opt.WriteOptions) error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+ sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+ // Acquire write lock.
+ if merge {
+ select {
+ case db.writeMergeC <- writeMerge{sync: sync, keyType: kt, key: key, value: value}:
+ if <-db.writeMergedC {
+ // Write is merged.
+ return <-db.writeAckC
+ }
+ // Write is not merged, the write lock is handed to us. Continue.
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ } else {
+ select {
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ }
+
+ batch := db.batchPool.Get().(*Batch)
+ batch.Reset()
+ batch.appendRec(kt, key, value)
+ return db.writeLocked(batch, batch, merge, sync)
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map. Write merge also applies for Put, see
+// Write.
+//
+// It is safe to modify the contents of the arguments after Put returns but not
+// before.
+func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
+ return db.putRec(keyTypeVal, key, value, wo)
+}
+
+// Delete deletes the value for the given key. Delete will not returns error if
+// key doesn't exist. Write merge also applies for Delete, see Write.
+//
+// It is safe to modify the contents of the arguments after Delete returns but
+// not before.
+func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
+ return db.putRec(keyTypeDel, key, nil, wo)
+}
+
+func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
+ iter := mem.NewIterator(nil)
+ defer iter.Release()
+ return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) &&
+ (min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0))
+}
+
+// CompactRange compacts the underlying DB for the given key range.
+// In particular, deleted and overwritten versions are discarded,
+// and the data is rearranged to reduce the cost of operations
+// needed to access the data. This operation should typically only
+// be invoked by users who understand the underlying implementation.
+//
+// A nil Range.Start is treated as a key before all keys in the DB.
+// And a nil Range.Limit is treated as a key after all keys in the DB.
+// Therefore if both is nil then it will compact entire DB.
+func (db *DB) CompactRange(r util.Range) error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ // Lock writer.
+ select {
+ case db.writeLockC <- struct{}{}:
+ case err := <-db.compPerErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ // Check for overlaps in memdb.
+ mdb := db.getEffectiveMem()
+ if mdb == nil {
+ return ErrClosed
+ }
+ defer mdb.decref()
+ if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
+ // Memdb compaction.
+ if _, err := db.rotateMem(0, false); err != nil {
+ <-db.writeLockC
+ return err
+ }
+ <-db.writeLockC
+ if err := db.compTriggerWait(db.mcompCmdC); err != nil {
+ return err
+ }
+ } else {
+ <-db.writeLockC
+ }
+
+ // Table compaction.
+ return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit)
+}
+
+// SetReadOnly makes DB read-only. It will stay read-only until reopened.
+func (db *DB) SetReadOnly() error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ // Lock writer.
+ select {
+ case db.writeLockC <- struct{}{}:
+ db.compWriteLocking = true
+ case err := <-db.compPerErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ // Set compaction read-only.
+ select {
+ case db.compErrSetC <- ErrReadOnly:
+ case perr := <-db.compPerErrC:
+ return perr
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/doc.go b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
new file mode 100644
index 000000000..53f13bb24
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
@@ -0,0 +1,90 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package leveldb provides implementation of LevelDB key/value database.
+//
+// Create or open a database:
+//
+// db, err := leveldb.OpenFile("path/to/db", nil)
+// ...
+// defer db.Close()
+// ...
+//
+// Read or modify the database content:
+//
+// // Remember that the contents of the returned slice should not be modified.
+// data, err := db.Get([]byte("key"), nil)
+// ...
+// err = db.Put([]byte("key"), []byte("value"), nil)
+// ...
+// err = db.Delete([]byte("key"), nil)
+// ...
+//
+// Iterate over database content:
+//
+// iter := db.NewIterator(nil, nil)
+// for iter.Next() {
+// // Remember that the contents of the returned slice should not be modified, and
+// // only valid until the next call to Next.
+// key := iter.Key()
+// value := iter.Value()
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Iterate over subset of database content with a particular prefix:
+// iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
+// for iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Seek-then-Iterate:
+//
+// iter := db.NewIterator(nil, nil)
+// for ok := iter.Seek(key); ok; ok = iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Iterate over subset of database content:
+//
+// iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil)
+// for iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Batch writes:
+//
+// batch := new(leveldb.Batch)
+// batch.Put([]byte("foo"), []byte("value"))
+// batch.Put([]byte("bar"), []byte("another value"))
+// batch.Delete([]byte("baz"))
+// err = db.Write(batch, nil)
+// ...
+//
+// Use bloom filter:
+//
+// o := &opt.Options{
+// Filter: filter.NewBloomFilter(10),
+// }
+// db, err := leveldb.OpenFile("path/to/db", o)
+// ...
+// defer db.Close()
+// ...
+package leveldb
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
new file mode 100644
index 000000000..de2649812
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
@@ -0,0 +1,20 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+)
+
+// Common errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrReadOnly = errors.New("leveldb: read-only mode")
+ ErrSnapshotReleased = errors.New("leveldb: snapshot released")
+ ErrIterReleased = errors.New("leveldb: iterator released")
+ ErrClosed = errors.New("leveldb: closed")
+)
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
new file mode 100644
index 000000000..8d6146b6f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package errors provides common error types used throughout leveldb.
+package errors
+
+import (
+ "errors"
+ "fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+ ErrNotFound = New("leveldb: not found")
+ ErrReleased = util.ErrReleased
+ ErrHasReleaser = util.ErrHasReleaser
+)
+
+// New returns an error that formats as the given text.
+func New(text string) error {
+ return errors.New(text)
+}
+
+// ErrCorrupted is the type that wraps errors that indicate corruption in
+// the database.
+type ErrCorrupted struct {
+ Fd storage.FileDesc
+ Err error
+}
+
+func (e *ErrCorrupted) Error() string {
+ if !e.Fd.Zero() {
+ return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+ }
+ return e.Err.Error()
+}
+
+// NewErrCorrupted creates new ErrCorrupted error.
+func NewErrCorrupted(fd storage.FileDesc, err error) error {
+ return &ErrCorrupted{fd, err}
+}
+
+// IsCorrupted returns a boolean indicating whether the error is indicating
+// a corruption.
+func IsCorrupted(err error) bool {
+ switch err.(type) {
+ case *ErrCorrupted:
+ return true
+ case *storage.ErrCorrupted:
+ return true
+ }
+ return false
+}
+
+// ErrMissingFiles is the type that indicating a corruption due to missing
+// files. ErrMissingFiles always wrapped with ErrCorrupted.
+type ErrMissingFiles struct {
+ Fds []storage.FileDesc
+}
+
+func (e *ErrMissingFiles) Error() string { return "file missing" }
+
+// SetFd sets 'file info' of the given error with the given file.
+// Currently only ErrCorrupted is supported, otherwise will do nothing.
+func SetFd(err error, fd storage.FileDesc) error {
+ switch x := err.(type) {
+ case *ErrCorrupted:
+ x.Fd = fd
+ return x
+ }
+ return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
new file mode 100644
index 000000000..e961e420d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
@@ -0,0 +1,31 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+type iFilter struct {
+ filter.Filter
+}
+
+func (f iFilter) Contains(filter, key []byte) bool {
+ return f.Filter.Contains(filter, internalKey(key).ukey())
+}
+
+func (f iFilter) NewGenerator() filter.FilterGenerator {
+ return iFilterGenerator{f.Filter.NewGenerator()}
+}
+
+type iFilterGenerator struct {
+ filter.FilterGenerator
+}
+
+func (g iFilterGenerator) Add(key []byte) {
+ g.FilterGenerator.Add(internalKey(key).ukey())
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
new file mode 100644
index 000000000..bab0e9970
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
@@ -0,0 +1,116 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package filter
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func bloomHash(key []byte) uint32 {
+ return util.Hash(key, 0xbc9f1d34)
+}
+
+type bloomFilter int
+
+// The bloom filter serializes its parameters and is backward compatible
+// with respect to them. Therefor, its parameters are not added to its
+// name.
+func (bloomFilter) Name() string {
+ return "leveldb.BuiltinBloomFilter"
+}
+
+func (f bloomFilter) Contains(filter, key []byte) bool {
+ nBytes := len(filter) - 1
+ if nBytes < 1 {
+ return false
+ }
+ nBits := uint32(nBytes * 8)
+
+ // Use the encoded k so that we can read filters generated by
+ // bloom filters created using different parameters.
+ k := filter[nBytes]
+ if k > 30 {
+ // Reserved for potentially new encodings for short bloom filters.
+ // Consider it a match.
+ return true
+ }
+
+ kh := bloomHash(key)
+ delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+ for j := uint8(0); j < k; j++ {
+ bitpos := kh % nBits
+ if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
+ return false
+ }
+ kh += delta
+ }
+ return true
+}
+
+func (f bloomFilter) NewGenerator() FilterGenerator {
+ // Round down to reduce probing cost a little bit.
+ k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
+ if k < 1 {
+ k = 1
+ } else if k > 30 {
+ k = 30
+ }
+ return &bloomFilterGenerator{
+ n: int(f),
+ k: k,
+ }
+}
+
+type bloomFilterGenerator struct {
+ n int
+ k uint8
+
+ keyHashes []uint32
+}
+
+func (g *bloomFilterGenerator) Add(key []byte) {
+ // Use double-hashing to generate a sequence of hash values.
+ // See analysis in [Kirsch,Mitzenmacher 2006].
+ g.keyHashes = append(g.keyHashes, bloomHash(key))
+}
+
+func (g *bloomFilterGenerator) Generate(b Buffer) {
+ // Compute bloom filter size (in both bits and bytes)
+ nBits := uint32(len(g.keyHashes) * g.n)
+ // For small n, we can see a very high false positive rate. Fix it
+ // by enforcing a minimum bloom filter length.
+ if nBits < 64 {
+ nBits = 64
+ }
+ nBytes := (nBits + 7) / 8
+ nBits = nBytes * 8
+
+ dest := b.Alloc(int(nBytes) + 1)
+ dest[nBytes] = g.k
+ for _, kh := range g.keyHashes {
+ delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+ for j := uint8(0); j < g.k; j++ {
+ bitpos := kh % nBits
+ dest[bitpos/8] |= (1 << (bitpos % 8))
+ kh += delta
+ }
+ }
+
+ g.keyHashes = g.keyHashes[:0]
+}
+
+// NewBloomFilter creates a new initialized bloom filter for given
+// bitsPerKey.
+//
+// Since bitsPerKey is persisted individually for each bloom filter
+// serialization, bloom filters are backwards compatible with respect to
+// changing bitsPerKey. This means that no big performance penalty will
+// be experienced when changing the parameter. See documentation for
+// opt.Options.Filter for more information.
+func NewBloomFilter(bitsPerKey int) Filter {
+ return bloomFilter(bitsPerKey)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
new file mode 100644
index 000000000..7a925c5a8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
@@ -0,0 +1,60 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package filter provides interface and implementation of probabilistic
+// data structure.
+//
+// The filter is resposible for creating small filter from a set of keys.
+// These filter will then used to test whether a key is a member of the set.
+// In many cases, a filter can cut down the number of disk seeks from a
+// handful to a single disk seek per DB.Get call.
+package filter
+
+// Buffer is the interface that wraps basic Alloc, Write and WriteByte methods.
+type Buffer interface {
+ // Alloc allocs n bytes of slice from the buffer. This also advancing
+ // write offset.
+ Alloc(n int) []byte
+
+ // Write appends the contents of p to the buffer.
+ Write(p []byte) (n int, err error)
+
+ // WriteByte appends the byte c to the buffer.
+ WriteByte(c byte) error
+}
+
+// Filter is the filter.
+type Filter interface {
+ // Name returns the name of this policy.
+ //
+ // Note that if the filter encoding changes in an incompatible way,
+ // the name returned by this method must be changed. Otherwise, old
+ // incompatible filters may be passed to methods of this type.
+ Name() string
+
+ // NewGenerator creates a new filter generator.
+ NewGenerator() FilterGenerator
+
+ // Contains returns true if the filter contains the given key.
+ //
+ // The filter are filters generated by the filter generator.
+ Contains(filter, key []byte) bool
+}
+
+// FilterGenerator is the filter generator.
+type FilterGenerator interface {
+ // Add adds a key to the filter generator.
+ //
+ // The key may become invalid after call to this method end, therefor
+ // key must be copied if implementation require keeping key for later
+ // use. The key should not modified directly, doing so may cause
+ // undefined results.
+ Add(key []byte)
+
+ // Generate generates filters based on keys passed so far. After call
+ // to Generate the filter generator maybe resetted, depends on implementation.
+ Generate(b Buffer)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
new file mode 100644
index 000000000..a23ab05f7
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
@@ -0,0 +1,184 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// BasicArray is the interface that wraps basic Len and Search method.
+type BasicArray interface {
+ // Len returns length of the array.
+ Len() int
+
+ // Search finds smallest index that point to a key that is greater
+ // than or equal to the given key.
+ Search(key []byte) int
+}
+
+// Array is the interface that wraps BasicArray and basic Index method.
+type Array interface {
+ BasicArray
+
+ // Index returns key/value pair with index of i.
+ Index(i int) (key, value []byte)
+}
+
+// Array is the interface that wraps BasicArray and basic Get method.
+type ArrayIndexer interface {
+ BasicArray
+
+ // Get returns a new data iterator with index of i.
+ Get(i int) Iterator
+}
+
+type basicArrayIterator struct {
+ util.BasicReleaser
+ array BasicArray
+ pos int
+ err error
+}
+
+func (i *basicArrayIterator) Valid() bool {
+ return i.pos >= 0 && i.pos < i.array.Len() && !i.Released()
+}
+
+func (i *basicArrayIterator) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.array.Len() == 0 {
+ i.pos = -1
+ return false
+ }
+ i.pos = 0
+ return true
+}
+
+func (i *basicArrayIterator) Last() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ n := i.array.Len()
+ if n == 0 {
+ i.pos = 0
+ return false
+ }
+ i.pos = n - 1
+ return true
+}
+
+func (i *basicArrayIterator) Seek(key []byte) bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ n := i.array.Len()
+ if n == 0 {
+ i.pos = 0
+ return false
+ }
+ i.pos = i.array.Search(key)
+ if i.pos >= n {
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Next() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.pos++
+ if n := i.array.Len(); i.pos >= n {
+ i.pos = n
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Prev() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.pos--
+ if i.pos < 0 {
+ i.pos = -1
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Error() error { return i.err }
+
+type arrayIterator struct {
+ basicArrayIterator
+ array Array
+ pos int
+ key, value []byte
+}
+
+func (i *arrayIterator) updateKV() {
+ if i.pos == i.basicArrayIterator.pos {
+ return
+ }
+ i.pos = i.basicArrayIterator.pos
+ if i.Valid() {
+ i.key, i.value = i.array.Index(i.pos)
+ } else {
+ i.key = nil
+ i.value = nil
+ }
+}
+
+func (i *arrayIterator) Key() []byte {
+ i.updateKV()
+ return i.key
+}
+
+func (i *arrayIterator) Value() []byte {
+ i.updateKV()
+ return i.value
+}
+
+type arrayIteratorIndexer struct {
+ basicArrayIterator
+ array ArrayIndexer
+}
+
+func (i *arrayIteratorIndexer) Get() Iterator {
+ if i.Valid() {
+ return i.array.Get(i.basicArrayIterator.pos)
+ }
+ return nil
+}
+
+// NewArrayIterator returns an iterator from the given array.
+func NewArrayIterator(array Array) Iterator {
+ return &arrayIterator{
+ basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+ array: array,
+ pos: -1,
+ }
+}
+
+// NewArrayIndexer returns an index iterator from the given array.
+func NewArrayIndexer(array ArrayIndexer) IteratorIndexer {
+ return &arrayIteratorIndexer{
+ basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+ array: array,
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
new file mode 100644
index 000000000..939adbb93
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
@@ -0,0 +1,242 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// IteratorIndexer is the interface that wraps CommonIterator and basic Get
+// method. IteratorIndexer provides index for indexed iterator.
+type IteratorIndexer interface {
+ CommonIterator
+
+ // Get returns a new data iterator for the current position, or nil if
+ // done.
+ Get() Iterator
+}
+
+type indexedIterator struct {
+ util.BasicReleaser
+ index IteratorIndexer
+ strict bool
+
+ data Iterator
+ err error
+ errf func(err error)
+ closed bool
+}
+
+func (i *indexedIterator) setData() {
+ if i.data != nil {
+ i.data.Release()
+ }
+ i.data = i.index.Get()
+}
+
+func (i *indexedIterator) clearData() {
+ if i.data != nil {
+ i.data.Release()
+ }
+ i.data = nil
+}
+
+func (i *indexedIterator) indexErr() {
+ if err := i.index.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ i.err = err
+ }
+}
+
+func (i *indexedIterator) dataErr() bool {
+ if err := i.data.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ if i.strict || !errors.IsCorrupted(err) {
+ i.err = err
+ return true
+ }
+ }
+ return false
+}
+
+func (i *indexedIterator) Valid() bool {
+ return i.data != nil && i.data.Valid()
+}
+
+func (i *indexedIterator) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.First() {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ return i.Next()
+}
+
+func (i *indexedIterator) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.Last() {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ if !i.data.Last() {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Prev()
+ }
+ return true
+}
+
+func (i *indexedIterator) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.Seek(key) {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ if !i.data.Seek(key) {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Next()
+ }
+ return true
+}
+
+func (i *indexedIterator) Next() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch {
+ case i.data != nil && !i.data.Next():
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ fallthrough
+ case i.data == nil:
+ if !i.index.Next() {
+ i.indexErr()
+ return false
+ }
+ i.setData()
+ return i.Next()
+ }
+ return true
+}
+
+func (i *indexedIterator) Prev() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch {
+ case i.data != nil && !i.data.Prev():
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ fallthrough
+ case i.data == nil:
+ if !i.index.Prev() {
+ i.indexErr()
+ return false
+ }
+ i.setData()
+ if !i.data.Last() {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Prev()
+ }
+ }
+ return true
+}
+
+func (i *indexedIterator) Key() []byte {
+ if i.data == nil {
+ return nil
+ }
+ return i.data.Key()
+}
+
+func (i *indexedIterator) Value() []byte {
+ if i.data == nil {
+ return nil
+ }
+ return i.data.Value()
+}
+
+func (i *indexedIterator) Release() {
+ i.clearData()
+ i.index.Release()
+ i.BasicReleaser.Release()
+}
+
+func (i *indexedIterator) Error() error {
+ if i.err != nil {
+ return i.err
+ }
+ if err := i.index.Error(); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (i *indexedIterator) SetErrorCallback(f func(err error)) {
+ i.errf = f
+}
+
+// NewIndexedIterator returns an 'indexed iterator'. An index is iterator
+// that returns another iterator, a 'data iterator'. A 'data iterator' is the
+// iterator that contains actual key/value pairs.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'indexed iterator', otherwise the iterator will
+// continue to the next 'data iterator'. Corruption on 'index iterator' will not be
+// ignored and will halt the iterator.
+func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator {
+ return &indexedIterator{index: index, strict: strict}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
new file mode 100644
index 000000000..3b5553274
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
@@ -0,0 +1,132 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package iterator provides interface and implementation to traverse over
+// contents of a database.
+package iterator
+
+import (
+ "errors"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+ ErrIterReleased = errors.New("leveldb/iterator: iterator released")
+)
+
+// IteratorSeeker is the interface that wraps the 'seeks method'.
+type IteratorSeeker interface {
+ // First moves the iterator to the first key/value pair. If the iterator
+ // only contains one key/value pair then First and Last would moves
+ // to the same key/value pair.
+ // It returns whether such pair exist.
+ First() bool
+
+ // Last moves the iterator to the last key/value pair. If the iterator
+ // only contains one key/value pair then First and Last would moves
+ // to the same key/value pair.
+ // It returns whether such pair exist.
+ Last() bool
+
+ // Seek moves the iterator to the first key/value pair whose key is greater
+ // than or equal to the given key.
+ // It returns whether such pair exist.
+ //
+ // It is safe to modify the contents of the argument after Seek returns.
+ Seek(key []byte) bool
+
+ // Next moves the iterator to the next key/value pair.
+ // It returns whether the iterator is exhausted.
+ Next() bool
+
+ // Prev moves the iterator to the previous key/value pair.
+ // It returns whether the iterator is exhausted.
+ Prev() bool
+}
+
+// CommonIterator is the interface that wraps common iterator methods.
+type CommonIterator interface {
+ IteratorSeeker
+
+ // util.Releaser is the interface that wraps basic Release method.
+ // When called Release will releases any resources associated with the
+ // iterator.
+ util.Releaser
+
+ // util.ReleaseSetter is the interface that wraps the basic SetReleaser
+ // method.
+ util.ReleaseSetter
+
+ // TODO: Remove this when ready.
+ Valid() bool
+
+ // Error returns any accumulated error. Exhausting all the key/value pairs
+ // is not considered to be an error.
+ Error() error
+}
+
+// Iterator iterates over a DB's key/value pairs in key order.
+//
+// When encounter an error any 'seeks method' will return false and will
+// yield no key/value pairs. The error can be queried by calling the Error
+// method. Calling Release is still necessary.
+//
+// An iterator must be released after use, but it is not necessary to read
+// an iterator until exhaustion.
+// Also, an iterator is not necessarily safe for concurrent use, but it is
+// safe to use multiple iterators concurrently, with each in a dedicated
+// goroutine.
+type Iterator interface {
+ CommonIterator
+
+ // Key returns the key of the current key/value pair, or nil if done.
+ // The caller should not modify the contents of the returned slice, and
+ // its contents may change on the next call to any 'seeks method'.
+ Key() []byte
+
+ // Value returns the key of the current key/value pair, or nil if done.
+ // The caller should not modify the contents of the returned slice, and
+ // its contents may change on the next call to any 'seeks method'.
+ Value() []byte
+}
+
+// ErrorCallbackSetter is the interface that wraps basic SetErrorCallback
+// method.
+//
+// ErrorCallbackSetter implemented by indexed and merged iterator.
+type ErrorCallbackSetter interface {
+ // SetErrorCallback allows set an error callback of the corresponding
+ // iterator. Use nil to clear the callback.
+ SetErrorCallback(f func(err error))
+}
+
+type emptyIterator struct {
+ util.BasicReleaser
+ err error
+}
+
+func (i *emptyIterator) rErr() {
+ if i.err == nil && i.Released() {
+ i.err = ErrIterReleased
+ }
+}
+
+func (*emptyIterator) Valid() bool { return false }
+func (i *emptyIterator) First() bool { i.rErr(); return false }
+func (i *emptyIterator) Last() bool { i.rErr(); return false }
+func (i *emptyIterator) Seek(key []byte) bool { i.rErr(); return false }
+func (i *emptyIterator) Next() bool { i.rErr(); return false }
+func (i *emptyIterator) Prev() bool { i.rErr(); return false }
+func (*emptyIterator) Key() []byte { return nil }
+func (*emptyIterator) Value() []byte { return nil }
+func (i *emptyIterator) Error() error { return i.err }
+
+// NewEmptyIterator creates an empty iterator. The err parameter can be
+// nil, but if not nil the given err will be returned by Error method.
+func NewEmptyIterator(err error) Iterator {
+ return &emptyIterator{err: err}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
new file mode 100644
index 000000000..1a7e29df8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
@@ -0,0 +1,304 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+type mergedIterator struct {
+ cmp comparer.Comparer
+ iters []Iterator
+ strict bool
+
+ keys [][]byte
+ index int
+ dir dir
+ err error
+ errf func(err error)
+ releaser util.Releaser
+}
+
+func assertKey(key []byte) []byte {
+ if key == nil {
+ panic("leveldb/iterator: nil key")
+ }
+ return key
+}
+
+func (i *mergedIterator) iterErr(iter Iterator) bool {
+ if err := iter.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ if i.strict || !errors.IsCorrupted(err) {
+ i.err = err
+ return true
+ }
+ }
+ return false
+}
+
+func (i *mergedIterator) Valid() bool {
+ return i.err == nil && i.dir > dirEOI
+}
+
+func (i *mergedIterator) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.First():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirSOI
+ return i.next()
+}
+
+func (i *mergedIterator) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.Last():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirEOI
+ return i.prev()
+}
+
+func (i *mergedIterator) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.Seek(key):
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirSOI
+ return i.next()
+}
+
+func (i *mergedIterator) next() bool {
+ var key []byte
+ if i.dir == dirForward {
+ key = i.keys[i.index]
+ }
+ for x, tkey := range i.keys {
+ if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) < 0) {
+ key = tkey
+ i.index = x
+ }
+ }
+ if key == nil {
+ i.dir = dirEOI
+ return false
+ }
+ i.dir = dirForward
+ return true
+}
+
+func (i *mergedIterator) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirSOI:
+ return i.First()
+ case dirBackward:
+ key := append([]byte{}, i.keys[i.index]...)
+ if !i.Seek(key) {
+ return false
+ }
+ return i.Next()
+ }
+
+ x := i.index
+ iter := i.iters[x]
+ switch {
+ case iter.Next():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ return i.next()
+}
+
+func (i *mergedIterator) prev() bool {
+ var key []byte
+ if i.dir == dirBackward {
+ key = i.keys[i.index]
+ }
+ for x, tkey := range i.keys {
+ if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) > 0) {
+ key = tkey
+ i.index = x
+ }
+ }
+ if key == nil {
+ i.dir = dirSOI
+ return false
+ }
+ i.dir = dirBackward
+ return true
+}
+
+func (i *mergedIterator) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirEOI:
+ return i.Last()
+ case dirForward:
+ key := append([]byte{}, i.keys[i.index]...)
+ for x, iter := range i.iters {
+ if x == i.index {
+ continue
+ }
+ seek := iter.Seek(key)
+ switch {
+ case seek && iter.Prev(), !seek && iter.Last():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ }
+
+ x := i.index
+ iter := i.iters[x]
+ switch {
+ case iter.Prev():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ return i.prev()
+}
+
+func (i *mergedIterator) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.keys[i.index]
+}
+
+func (i *mergedIterator) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.iters[i.index].Value()
+}
+
+func (i *mergedIterator) Release() {
+ if i.dir != dirReleased {
+ i.dir = dirReleased
+ for _, iter := range i.iters {
+ iter.Release()
+ }
+ i.iters = nil
+ i.keys = nil
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+ }
+}
+
+func (i *mergedIterator) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *mergedIterator) Error() error {
+ return i.err
+}
+
+func (i *mergedIterator) SetErrorCallback(f func(err error)) {
+ i.errf = f
+}
+
+// NewMergedIterator returns an iterator that merges its input. Walking the
+// resultant iterator will return all key/value pairs of all input iterators
+// in strictly increasing key order, as defined by cmp.
+// The input's key ranges may overlap, but there are assumed to be no duplicate
+// keys: if iters[i] contains a key k then iters[j] will not contain that key k.
+// None of the iters may be nil.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'merged iterator', otherwise the iterator will
+// continue to the next 'input iterator'.
+func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator {
+ return &mergedIterator{
+ iters: iters,
+ cmp: cmp,
+ strict: strict,
+ keys: make([][]byte, len(iters)),
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
new file mode 100644
index 000000000..d094c3d0f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
@@ -0,0 +1,524 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record.go?r=1d5ccbe03246da926391ee12d1c6caae054ff4b0
+// License, authors and contributors informations can be found at bellow URLs respectively:
+// https://code.google.com/p/leveldb-go/source/browse/LICENSE
+// https://code.google.com/p/leveldb-go/source/browse/AUTHORS
+// https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS
+
+// Package journal reads and writes sequences of journals. Each journal is a stream
+// of bytes that completes before the next journal starts.
+//
+// When reading, call Next to obtain an io.Reader for the next journal. Next will
+// return io.EOF when there are no more journals. It is valid to call Next
+// without reading the current journal to exhaustion.
+//
+// When writing, call Next to obtain an io.Writer for the next journal. Calling
+// Next finishes the current journal. Call Close to finish the final journal.
+//
+// Optionally, call Flush to finish the current journal and flush the underlying
+// writer without starting a new journal. To start a new journal after flushing,
+// call Next.
+//
+// Neither Readers or Writers are safe to use concurrently.
+//
+// Example code:
+// func read(r io.Reader) ([]string, error) {
+// var ss []string
+// journals := journal.NewReader(r, nil, true, true)
+// for {
+// j, err := journals.Next()
+// if err == io.EOF {
+// break
+// }
+// if err != nil {
+// return nil, err
+// }
+// s, err := ioutil.ReadAll(j)
+// if err != nil {
+// return nil, err
+// }
+// ss = append(ss, string(s))
+// }
+// return ss, nil
+// }
+//
+// func write(w io.Writer, ss []string) error {
+// journals := journal.NewWriter(w)
+// for _, s := range ss {
+// j, err := journals.Next()
+// if err != nil {
+// return err
+// }
+// if _, err := j.Write([]byte(s)), err != nil {
+// return err
+// }
+// }
+// return journals.Close()
+// }
+//
+// The wire format is that the stream is divided into 32KiB blocks, and each
+// block contains a number of tightly packed chunks. Chunks cannot cross block
+// boundaries. The last block may be shorter than 32 KiB. Any unused bytes in a
+// block must be zero.
+//
+// A journal maps to one or more chunks. Each chunk has a 7 byte header (a 4
+// byte checksum, a 2 byte little-endian uint16 length, and a 1 byte chunk type)
+// followed by a payload. The checksum is over the chunk type and the payload.
+//
+// There are four chunk types: whether the chunk is the full journal, or the
+// first, middle or last chunk of a multi-chunk journal. A multi-chunk journal
+// has one first chunk, zero or more middle chunks, and one last chunk.
+//
+// The wire format allows for limited recovery in the face of data corruption:
+// on a format error (such as a checksum mismatch), the reader moves to the
+// next block and looks for the next full or first chunk.
+package journal
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// These constants are part of the wire format and should not be changed.
+const (
+ fullChunkType = 1
+ firstChunkType = 2
+ middleChunkType = 3
+ lastChunkType = 4
+)
+
+const (
+ blockSize = 32 * 1024
+ headerSize = 7
+)
+
+type flusher interface {
+ Flush() error
+}
+
+// ErrCorrupted is the error type that generated by corrupted block or chunk.
+type ErrCorrupted struct {
+ Size int
+ Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size)
+}
+
+// Dropper is the interface that wrap simple Drop method. The Drop
+// method will be called when the journal reader dropping a block or chunk.
+type Dropper interface {
+ Drop(err error)
+}
+
+// Reader reads journals from an underlying io.Reader.
+type Reader struct {
+ // r is the underlying reader.
+ r io.Reader
+ // the dropper.
+ dropper Dropper
+ // strict flag.
+ strict bool
+ // checksum flag.
+ checksum bool
+ // seq is the sequence number of the current journal.
+ seq int
+ // buf[i:j] is the unread portion of the current chunk's payload.
+ // The low bound, i, excludes the chunk header.
+ i, j int
+ // n is the number of bytes of buf that are valid. Once reading has started,
+ // only the final block can have n < blockSize.
+ n int
+ // last is whether the current chunk is the last chunk of the journal.
+ last bool
+ // err is any accumulated error.
+ err error
+ // buf is the buffer.
+ buf [blockSize]byte
+}
+
+// NewReader returns a new reader. The dropper may be nil, and if
+// strict is true then corrupted or invalid chunk will halt the journal
+// reader entirely.
+func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader {
+ return &Reader{
+ r: r,
+ dropper: dropper,
+ strict: strict,
+ checksum: checksum,
+ last: true,
+ }
+}
+
+var errSkip = errors.New("leveldb/journal: skipped")
+
+func (r *Reader) corrupt(n int, reason string, skip bool) error {
+ if r.dropper != nil {
+ r.dropper.Drop(&ErrCorrupted{n, reason})
+ }
+ if r.strict && !skip {
+ r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason})
+ return r.err
+ }
+ return errSkip
+}
+
+// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the
+// next block into the buffer if necessary.
+func (r *Reader) nextChunk(first bool) error {
+ for {
+ if r.j+headerSize <= r.n {
+ checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4])
+ length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6])
+ chunkType := r.buf[r.j+6]
+ unprocBlock := r.n - r.j
+ if checksum == 0 && length == 0 && chunkType == 0 {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "zero header", false)
+ }
+ if chunkType < fullChunkType || chunkType > lastChunkType {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, fmt.Sprintf("invalid chunk type %#x", chunkType), false)
+ }
+ r.i = r.j + headerSize
+ r.j = r.j + headerSize + int(length)
+ if r.j > r.n {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "chunk length overflows block", false)
+ } else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "checksum mismatch", false)
+ }
+ if first && chunkType != fullChunkType && chunkType != firstChunkType {
+ chunkLength := (r.j - r.i) + headerSize
+ r.i = r.j
+ // Report the error, but skip it.
+ return r.corrupt(chunkLength, "orphan chunk", true)
+ }
+ r.last = chunkType == fullChunkType || chunkType == lastChunkType
+ return nil
+ }
+
+ // The last block.
+ if r.n < blockSize && r.n > 0 {
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
+ }
+ r.err = io.EOF
+ return r.err
+ }
+
+ // Read block.
+ n, err := io.ReadFull(r.r, r.buf[:])
+ if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+ return err
+ }
+ if n == 0 {
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
+ }
+ r.err = io.EOF
+ return r.err
+ }
+ r.i, r.j, r.n = 0, 0, n
+ }
+}
+
+// Next returns a reader for the next journal. It returns io.EOF if there are no
+// more journals. The reader returned becomes stale after the next Next call,
+// and should no longer be used. If strict is false, the reader will returns
+// io.ErrUnexpectedEOF error when found corrupted journal.
+func (r *Reader) Next() (io.Reader, error) {
+ r.seq++
+ if r.err != nil {
+ return nil, r.err
+ }
+ r.i = r.j
+ for {
+ if err := r.nextChunk(true); err == nil {
+ break
+ } else if err != errSkip {
+ return nil, err
+ }
+ }
+ return &singleReader{r, r.seq, nil}, nil
+}
+
+// Reset resets the journal reader, allows reuse of the journal reader. Reset returns
+// last accumulated error.
+func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error {
+ r.seq++
+ err := r.err
+ r.r = reader
+ r.dropper = dropper
+ r.strict = strict
+ r.checksum = checksum
+ r.i = 0
+ r.j = 0
+ r.n = 0
+ r.last = true
+ r.err = nil
+ return err
+}
+
+type singleReader struct {
+ r *Reader
+ seq int
+ err error
+}
+
+func (x *singleReader) Read(p []byte) (int, error) {
+ r := x.r
+ if r.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale reader")
+ }
+ if x.err != nil {
+ return 0, x.err
+ }
+ if r.err != nil {
+ return 0, r.err
+ }
+ for r.i == r.j {
+ if r.last {
+ return 0, io.EOF
+ }
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
+ return 0, x.err
+ }
+ }
+ n := copy(p, r.buf[r.i:r.j])
+ r.i += n
+ return n, nil
+}
+
+func (x *singleReader) ReadByte() (byte, error) {
+ r := x.r
+ if r.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale reader")
+ }
+ if x.err != nil {
+ return 0, x.err
+ }
+ if r.err != nil {
+ return 0, r.err
+ }
+ for r.i == r.j {
+ if r.last {
+ return 0, io.EOF
+ }
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
+ return 0, x.err
+ }
+ }
+ c := r.buf[r.i]
+ r.i++
+ return c, nil
+}
+
+// Writer writes journals to an underlying io.Writer.
+type Writer struct {
+ // w is the underlying writer.
+ w io.Writer
+ // seq is the sequence number of the current journal.
+ seq int
+ // f is w as a flusher.
+ f flusher
+ // buf[i:j] is the bytes that will become the current chunk.
+ // The low bound, i, includes the chunk header.
+ i, j int
+ // buf[:written] has already been written to w.
+ // written is zero unless Flush has been called.
+ written int
+ // first is whether the current chunk is the first chunk of the journal.
+ first bool
+ // pending is whether a chunk is buffered but not yet written.
+ pending bool
+ // err is any accumulated error.
+ err error
+ // buf is the buffer.
+ buf [blockSize]byte
+}
+
+// NewWriter returns a new Writer.
+func NewWriter(w io.Writer) *Writer {
+ f, _ := w.(flusher)
+ return &Writer{
+ w: w,
+ f: f,
+ }
+}
+
+// fillHeader fills in the header for the pending chunk.
+func (w *Writer) fillHeader(last bool) {
+ if w.i+headerSize > w.j || w.j > blockSize {
+ panic("leveldb/journal: bad writer state")
+ }
+ if last {
+ if w.first {
+ w.buf[w.i+6] = fullChunkType
+ } else {
+ w.buf[w.i+6] = lastChunkType
+ }
+ } else {
+ if w.first {
+ w.buf[w.i+6] = firstChunkType
+ } else {
+ w.buf[w.i+6] = middleChunkType
+ }
+ }
+ binary.LittleEndian.PutUint32(w.buf[w.i+0:w.i+4], util.NewCRC(w.buf[w.i+6:w.j]).Value())
+ binary.LittleEndian.PutUint16(w.buf[w.i+4:w.i+6], uint16(w.j-w.i-headerSize))
+}
+
+// writeBlock writes the buffered block to the underlying writer, and reserves
+// space for the next chunk's header.
+func (w *Writer) writeBlock() {
+ _, w.err = w.w.Write(w.buf[w.written:])
+ w.i = 0
+ w.j = headerSize
+ w.written = 0
+}
+
+// writePending finishes the current journal and writes the buffer to the
+// underlying writer.
+func (w *Writer) writePending() {
+ if w.err != nil {
+ return
+ }
+ if w.pending {
+ w.fillHeader(true)
+ w.pending = false
+ }
+ _, w.err = w.w.Write(w.buf[w.written:w.j])
+ w.written = w.j
+}
+
+// Close finishes the current journal and closes the writer.
+func (w *Writer) Close() error {
+ w.seq++
+ w.writePending()
+ if w.err != nil {
+ return w.err
+ }
+ w.err = errors.New("leveldb/journal: closed Writer")
+ return nil
+}
+
+// Flush finishes the current journal, writes to the underlying writer, and
+// flushes it if that writer implements interface{ Flush() error }.
+func (w *Writer) Flush() error {
+ w.seq++
+ w.writePending()
+ if w.err != nil {
+ return w.err
+ }
+ if w.f != nil {
+ w.err = w.f.Flush()
+ return w.err
+ }
+ return nil
+}
+
+// Reset resets the journal writer, allows reuse of the journal writer. Reset
+// will also closes the journal writer if not already.
+func (w *Writer) Reset(writer io.Writer) (err error) {
+ w.seq++
+ if w.err == nil {
+ w.writePending()
+ err = w.err
+ }
+ w.w = writer
+ w.f, _ = writer.(flusher)
+ w.i = 0
+ w.j = 0
+ w.written = 0
+ w.first = false
+ w.pending = false
+ w.err = nil
+ return
+}
+
+// Next returns a writer for the next journal. The writer returned becomes stale
+// after the next Close, Flush or Next call, and should no longer be used.
+func (w *Writer) Next() (io.Writer, error) {
+ w.seq++
+ if w.err != nil {
+ return nil, w.err
+ }
+ if w.pending {
+ w.fillHeader(true)
+ }
+ w.i = w.j
+ w.j = w.j + headerSize
+ // Check if there is room in the block for the header.
+ if w.j > blockSize {
+ // Fill in the rest of the block with zeroes.
+ for k := w.i; k < blockSize; k++ {
+ w.buf[k] = 0
+ }
+ w.writeBlock()
+ if w.err != nil {
+ return nil, w.err
+ }
+ }
+ w.first = true
+ w.pending = true
+ return singleWriter{w, w.seq}, nil
+}
+
+type singleWriter struct {
+ w *Writer
+ seq int
+}
+
+func (x singleWriter) Write(p []byte) (int, error) {
+ w := x.w
+ if w.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale writer")
+ }
+ if w.err != nil {
+ return 0, w.err
+ }
+ n0 := len(p)
+ for len(p) > 0 {
+ // Write a block, if it is full.
+ if w.j == blockSize {
+ w.fillHeader(false)
+ w.writeBlock()
+ if w.err != nil {
+ return 0, w.err
+ }
+ w.first = false
+ }
+ // Copy bytes into the buffer.
+ n := copy(w.buf[w.j:], p)
+ w.j += n
+ p = p[n:]
+ }
+ return n0, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/key.go b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
new file mode 100644
index 000000000..ad8f51ec8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
@@ -0,0 +1,143 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "encoding/binary"
+ "fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrInternalKeyCorrupted records internal key corruption.
+type ErrInternalKeyCorrupted struct {
+ Ikey []byte
+ Reason string
+}
+
+func (e *ErrInternalKeyCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason)
+}
+
+func newErrInternalKeyCorrupted(ikey []byte, reason string) error {
+ return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason})
+}
+
+type keyType uint
+
+func (kt keyType) String() string {
+ switch kt {
+ case keyTypeDel:
+ return "d"
+ case keyTypeVal:
+ return "v"
+ }
+ return fmt.Sprintf("<invalid:%#x>", uint(kt))
+}
+
+// Value types encoded as the last component of internal keys.
+// Don't modify; this value are saved to disk.
+const (
+ keyTypeDel = keyType(0)
+ keyTypeVal = keyType(1)
+)
+
+// keyTypeSeek defines the keyType that should be passed when constructing an
+// internal key for seeking to a particular sequence number (since we
+// sort sequence numbers in decreasing order and the value type is
+// embedded as the low 8 bits in the sequence number in internal keys,
+// we need to use the highest-numbered ValueType, not the lowest).
+const keyTypeSeek = keyTypeVal
+
+const (
+ // Maximum value possible for sequence number; the 8-bits are
+ // used by value type, so its can packed together in single
+ // 64-bit integer.
+ keyMaxSeq = (uint64(1) << 56) - 1
+ // Maximum value possible for packed sequence number and type.
+ keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek)
+)
+
+// Maximum number encoded in bytes.
+var keyMaxNumBytes = make([]byte, 8)
+
+func init() {
+ binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum)
+}
+
+type internalKey []byte
+
+func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey {
+ if seq > keyMaxSeq {
+ panic("leveldb: invalid sequence number")
+ } else if kt > keyTypeVal {
+ panic("leveldb: invalid type")
+ }
+
+ dst = ensureBuffer(dst, len(ukey)+8)
+ copy(dst, ukey)
+ binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt))
+ return internalKey(dst)
+}
+
+func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) {
+ if len(ik) < 8 {
+ return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length")
+ }
+ num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
+ seq, kt = uint64(num>>8), keyType(num&0xff)
+ if kt > keyTypeVal {
+ return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type")
+ }
+ ukey = ik[:len(ik)-8]
+ return
+}
+
+func validInternalKey(ik []byte) bool {
+ _, _, _, err := parseInternalKey(ik)
+ return err == nil
+}
+
+func (ik internalKey) assert() {
+ if ik == nil {
+ panic("leveldb: nil internalKey")
+ }
+ if len(ik) < 8 {
+ panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik)))
+ }
+}
+
+func (ik internalKey) ukey() []byte {
+ ik.assert()
+ return ik[:len(ik)-8]
+}
+
+func (ik internalKey) num() uint64 {
+ ik.assert()
+ return binary.LittleEndian.Uint64(ik[len(ik)-8:])
+}
+
+func (ik internalKey) parseNum() (seq uint64, kt keyType) {
+ num := ik.num()
+ seq, kt = uint64(num>>8), keyType(num&0xff)
+ if kt > keyTypeVal {
+ panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
+ }
+ return
+}
+
+func (ik internalKey) String() string {
+ if ik == nil {
+ return "<nil>"
+ }
+
+ if ukey, seq, kt, err := parseInternalKey(ik); err == nil {
+ return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
+ }
+ return fmt.Sprintf("<invalid:%#x>", []byte(ik))
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
new file mode 100644
index 000000000..18a19ed42
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
@@ -0,0 +1,475 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package memdb provides in-memory key/value database implementation.
+package memdb
+
+import (
+ "math/rand"
+ "sync"
+
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrIterReleased = errors.New("leveldb/memdb: iterator released")
+)
+
+const tMaxHeight = 12
+
+type dbIter struct {
+ util.BasicReleaser
+ p *DB
+ slice *util.Range
+ node int
+ forward bool
+ key, value []byte
+ err error
+}
+
+func (i *dbIter) fill(checkStart, checkLimit bool) bool {
+ if i.node != 0 {
+ n := i.p.nodeData[i.node]
+ m := n + i.p.nodeData[i.node+nKey]
+ i.key = i.p.kvData[n:m]
+ if i.slice != nil {
+ switch {
+ case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0:
+ fallthrough
+ case checkStart && i.slice.Start != nil && i.p.cmp.Compare(i.key, i.slice.Start) < 0:
+ i.node = 0
+ goto bail
+ }
+ }
+ i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]]
+ return true
+ }
+bail:
+ i.key = nil
+ i.value = nil
+ return false
+}
+
+func (i *dbIter) Valid() bool {
+ return i.node != 0
+}
+
+func (i *dbIter) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Start != nil {
+ i.node, _ = i.p.findGE(i.slice.Start, false)
+ } else {
+ i.node = i.p.nodeData[nNext]
+ }
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Last() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = false
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Limit != nil {
+ i.node = i.p.findLT(i.slice.Limit)
+ } else {
+ i.node = i.p.findLast()
+ }
+ return i.fill(true, false)
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Start != nil && i.p.cmp.Compare(key, i.slice.Start) < 0 {
+ key = i.slice.Start
+ }
+ i.node, _ = i.p.findGE(key, false)
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Next() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.node == 0 {
+ if !i.forward {
+ return i.First()
+ }
+ return false
+ }
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ i.node = i.p.nodeData[i.node+nNext]
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Prev() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.node == 0 {
+ if i.forward {
+ return i.Last()
+ }
+ return false
+ }
+ i.forward = false
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ i.node = i.p.findLT(i.key)
+ return i.fill(true, false)
+}
+
+func (i *dbIter) Key() []byte {
+ return i.key
+}
+
+func (i *dbIter) Value() []byte {
+ return i.value
+}
+
+func (i *dbIter) Error() error { return i.err }
+
+func (i *dbIter) Release() {
+ if !i.Released() {
+ i.p = nil
+ i.node = 0
+ i.key = nil
+ i.value = nil
+ i.BasicReleaser.Release()
+ }
+}
+
+const (
+ nKV = iota
+ nKey
+ nVal
+ nHeight
+ nNext
+)
+
+// DB is an in-memory key/value database.
+type DB struct {
+ cmp comparer.BasicComparer
+ rnd *rand.Rand
+
+ mu sync.RWMutex
+ kvData []byte
+ // Node data:
+ // [0] : KV offset
+ // [1] : Key length
+ // [2] : Value length
+ // [3] : Height
+ // [3..height] : Next nodes
+ nodeData []int
+ prevNode [tMaxHeight]int
+ maxHeight int
+ n int
+ kvSize int
+}
+
+func (p *DB) randHeight() (h int) {
+ const branching = 4
+ h = 1
+ for h < tMaxHeight && p.rnd.Int()%branching == 0 {
+ h++
+ }
+ return
+}
+
+// Must hold RW-lock if prev == true, as it use shared prevNode slice.
+func (p *DB) findGE(key []byte, prev bool) (int, bool) {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ cmp := 1
+ if next != 0 {
+ o := p.nodeData[next]
+ cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key)
+ }
+ if cmp < 0 {
+ // Keep searching in this list
+ node = next
+ } else {
+ if prev {
+ p.prevNode[h] = node
+ } else if cmp == 0 {
+ return next, true
+ }
+ if h == 0 {
+ return next, cmp == 0
+ }
+ h--
+ }
+ }
+}
+
+func (p *DB) findLT(key []byte) int {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ o := p.nodeData[next]
+ if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 {
+ if h == 0 {
+ break
+ }
+ h--
+ } else {
+ node = next
+ }
+ }
+ return node
+}
+
+func (p *DB) findLast() int {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ if next == 0 {
+ if h == 0 {
+ break
+ }
+ h--
+ } else {
+ node = next
+ }
+ }
+ return node
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (p *DB) Put(key []byte, value []byte) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if node, exact := p.findGE(key, true); exact {
+ kvOffset := len(p.kvData)
+ p.kvData = append(p.kvData, key...)
+ p.kvData = append(p.kvData, value...)
+ p.nodeData[node] = kvOffset
+ m := p.nodeData[node+nVal]
+ p.nodeData[node+nVal] = len(value)
+ p.kvSize += len(value) - m
+ return nil
+ }
+
+ h := p.randHeight()
+ if h > p.maxHeight {
+ for i := p.maxHeight; i < h; i++ {
+ p.prevNode[i] = 0
+ }
+ p.maxHeight = h
+ }
+
+ kvOffset := len(p.kvData)
+ p.kvData = append(p.kvData, key...)
+ p.kvData = append(p.kvData, value...)
+ // Node
+ node := len(p.nodeData)
+ p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h)
+ for i, n := range p.prevNode[:h] {
+ m := n + nNext + i
+ p.nodeData = append(p.nodeData, p.nodeData[m])
+ p.nodeData[m] = node
+ }
+
+ p.kvSize += len(key) + len(value)
+ p.n++
+ return nil
+}
+
+// Delete deletes the value for the given key. It returns ErrNotFound if
+// the DB does not contain the key.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (p *DB) Delete(key []byte) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ node, exact := p.findGE(key, true)
+ if !exact {
+ return ErrNotFound
+ }
+
+ h := p.nodeData[node+nHeight]
+ for i, n := range p.prevNode[:h] {
+ m := n + 4 + i
+ p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i]
+ }
+
+ p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal]
+ p.n--
+ return nil
+}
+
+// Contains returns true if the given key are in the DB.
+//
+// It is safe to modify the contents of the arguments after Contains returns.
+func (p *DB) Contains(key []byte) bool {
+ p.mu.RLock()
+ _, exact := p.findGE(key, false)
+ p.mu.RUnlock()
+ return exact
+}
+
+// Get gets the value for the given key. It returns error.ErrNotFound if the
+// DB does not contain the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (p *DB) Get(key []byte) (value []byte, err error) {
+ p.mu.RLock()
+ if node, exact := p.findGE(key, false); exact {
+ o := p.nodeData[node] + p.nodeData[node+nKey]
+ value = p.kvData[o : o+p.nodeData[node+nVal]]
+ } else {
+ err = ErrNotFound
+ }
+ p.mu.RUnlock()
+ return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Find returns.
+func (p *DB) Find(key []byte) (rkey, value []byte, err error) {
+ p.mu.RLock()
+ if node, _ := p.findGE(key, false); node != 0 {
+ n := p.nodeData[node]
+ m := n + p.nodeData[node+nKey]
+ rkey = p.kvData[n:m]
+ value = p.kvData[m : m+p.nodeData[node+nVal]]
+ } else {
+ err = ErrNotFound
+ }
+ p.mu.RUnlock()
+ return
+}
+
+// NewIterator returns an iterator of the DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. However, the resultant key/value pairs are not guaranteed
+// to be a consistent snapshot of the DB at a particular point in time.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (p *DB) NewIterator(slice *util.Range) iterator.Iterator {
+ return &dbIter{p: p, slice: slice}
+}
+
+// Capacity returns keys/values buffer capacity.
+func (p *DB) Capacity() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return cap(p.kvData)
+}
+
+// Size returns sum of keys and values length. Note that deleted
+// key/value will not be accounted for, but it will still consume
+// the buffer, since the buffer is append only.
+func (p *DB) Size() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return p.kvSize
+}
+
+// Free returns keys/values free buffer before need to grow.
+func (p *DB) Free() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return cap(p.kvData) - len(p.kvData)
+}
+
+// Len returns the number of entries in the DB.
+func (p *DB) Len() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return p.n
+}
+
+// Reset resets the DB to initial empty state. Allows reuse the buffer.
+func (p *DB) Reset() {
+ p.mu.Lock()
+ p.rnd = rand.New(rand.NewSource(0xdeadbeef))
+ p.maxHeight = 1
+ p.n = 0
+ p.kvSize = 0
+ p.kvData = p.kvData[:0]
+ p.nodeData = p.nodeData[:nNext+tMaxHeight]
+ p.nodeData[nKV] = 0
+ p.nodeData[nKey] = 0
+ p.nodeData[nVal] = 0
+ p.nodeData[nHeight] = tMaxHeight
+ for n := 0; n < tMaxHeight; n++ {
+ p.nodeData[nNext+n] = 0
+ p.prevNode[n] = 0
+ }
+ p.mu.Unlock()
+}
+
+// New creates a new initialized in-memory key/value DB. The capacity
+// is the initial key/value buffer capacity. The capacity is advisory,
+// not enforced.
+//
+// This DB is append-only, deleting an entry would remove entry node but not
+// reclaim KV buffer.
+//
+// The returned DB instance is safe for concurrent use.
+func New(cmp comparer.BasicComparer, capacity int) *DB {
+ p := &DB{
+ cmp: cmp,
+ rnd: rand.New(rand.NewSource(0xdeadbeef)),
+ maxHeight: 1,
+ kvData: make([]byte, 0, capacity),
+ nodeData: make([]int, 4+tMaxHeight),
+ }
+ p.nodeData[nHeight] = tMaxHeight
+ return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
new file mode 100644
index 000000000..44e7d9adc
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
@@ -0,0 +1,684 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package opt provides sets of options used by LevelDB.
+package opt
+
+import (
+ "math"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+const (
+ KiB = 1024
+ MiB = KiB * 1024
+ GiB = MiB * 1024
+)
+
+var (
+ DefaultBlockCacher = LRUCacher
+ DefaultBlockCacheCapacity = 8 * MiB
+ DefaultBlockRestartInterval = 16
+ DefaultBlockSize = 4 * KiB
+ DefaultCompactionExpandLimitFactor = 25
+ DefaultCompactionGPOverlapsFactor = 10
+ DefaultCompactionL0Trigger = 4
+ DefaultCompactionSourceLimitFactor = 1
+ DefaultCompactionTableSize = 2 * MiB
+ DefaultCompactionTableSizeMultiplier = 1.0
+ DefaultCompactionTotalSize = 10 * MiB
+ DefaultCompactionTotalSizeMultiplier = 10.0
+ DefaultCompressionType = SnappyCompression
+ DefaultIteratorSamplingRate = 1 * MiB
+ DefaultOpenFilesCacher = LRUCacher
+ DefaultOpenFilesCacheCapacity = 500
+ DefaultWriteBuffer = 4 * MiB
+ DefaultWriteL0PauseTrigger = 12
+ DefaultWriteL0SlowdownTrigger = 8
+)
+
+// Cacher is a caching algorithm.
+type Cacher interface {
+ New(capacity int) cache.Cacher
+}
+
+type CacherFunc struct {
+ NewFunc func(capacity int) cache.Cacher
+}
+
+func (f *CacherFunc) New(capacity int) cache.Cacher {
+ if f.NewFunc != nil {
+ return f.NewFunc(capacity)
+ }
+ return nil
+}
+
+func noCacher(int) cache.Cacher { return nil }
+
+var (
+ // LRUCacher is the LRU-cache algorithm.
+ LRUCacher = &CacherFunc{cache.NewLRU}
+
+ // NoCacher is the value to disable caching algorithm.
+ NoCacher = &CacherFunc{}
+)
+
+// Compression is the 'sorted table' block compression algorithm to use.
+type Compression uint
+
+func (c Compression) String() string {
+ switch c {
+ case DefaultCompression:
+ return "default"
+ case NoCompression:
+ return "none"
+ case SnappyCompression:
+ return "snappy"
+ }
+ return "invalid"
+}
+
+const (
+ DefaultCompression Compression = iota
+ NoCompression
+ SnappyCompression
+ nCompression
+)
+
+// Strict is the DB 'strict level'.
+type Strict uint
+
+const (
+ // If present then a corrupted or invalid chunk or block in manifest
+ // journal will cause an error instead of being dropped.
+ // This will prevent database with corrupted manifest to be opened.
+ StrictManifest Strict = 1 << iota
+
+ // If present then journal chunk checksum will be verified.
+ StrictJournalChecksum
+
+ // If present then a corrupted or invalid chunk or block in journal
+ // will cause an error instead of being dropped.
+ // This will prevent database with corrupted journal to be opened.
+ StrictJournal
+
+ // If present then 'sorted table' block checksum will be verified.
+ // This has effect on both 'read operation' and compaction.
+ StrictBlockChecksum
+
+ // If present then a corrupted 'sorted table' will fails compaction.
+ // The database will enter read-only mode.
+ StrictCompaction
+
+ // If present then a corrupted 'sorted table' will halts 'read operation'.
+ StrictReader
+
+ // If present then leveldb.Recover will drop corrupted 'sorted table'.
+ StrictRecovery
+
+ // This only applicable for ReadOptions, if present then this ReadOptions
+ // 'strict level' will override global ones.
+ StrictOverride
+
+ // StrictAll enables all strict flags.
+ StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery
+
+ // DefaultStrict is the default strict flags. Specify any strict flags
+ // will override default strict flags as whole (i.e. not OR'ed).
+ DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader
+
+ // NoStrict disables all strict flags. Override default strict flags.
+ NoStrict = ^StrictAll
+)
+
+// Options holds the optional parameters for the DB at large.
+type Options struct {
+ // AltFilters defines one or more 'alternative filters'.
+ // 'alternative filters' will be used during reads if a filter block
+ // does not match with the 'effective filter'.
+ //
+ // The default value is nil
+ AltFilters []filter.Filter
+
+ // BlockCacher provides cache algorithm for LevelDB 'sorted table' block caching.
+ // Specify NoCacher to disable caching algorithm.
+ //
+ // The default value is LRUCacher.
+ BlockCacher Cacher
+
+ // BlockCacheCapacity defines the capacity of the 'sorted table' block caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher.
+ //
+ // The default value is 8MiB.
+ BlockCacheCapacity int
+
+ // BlockRestartInterval is the number of keys between restart points for
+ // delta encoding of keys.
+ //
+ // The default value is 16.
+ BlockRestartInterval int
+
+ // BlockSize is the minimum uncompressed size in bytes of each 'sorted table'
+ // block.
+ //
+ // The default value is 4KiB.
+ BlockSize int
+
+ // CompactionExpandLimitFactor limits compaction size after expanded.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 25.
+ CompactionExpandLimitFactor int
+
+ // CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a
+ // single 'sorted table' generates.
+ // This will be multiplied by table size limit at grandparent level.
+ //
+ // The default value is 10.
+ CompactionGPOverlapsFactor int
+
+ // CompactionL0Trigger defines number of 'sorted table' at level-0 that will
+ // trigger compaction.
+ //
+ // The default value is 4.
+ CompactionL0Trigger int
+
+ // CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
+ // level-0.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 1.
+ CompactionSourceLimitFactor int
+
+ // CompactionTableSize limits size of 'sorted table' that compaction generates.
+ // The limits for each level will be calculated as:
+ // CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
+ //
+ // The default value is 2MiB.
+ CompactionTableSize int
+
+ // CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
+ //
+ // The default value is 1.
+ CompactionTableSizeMultiplier float64
+
+ // CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTableSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTableSizeMultiplierPerLevel []float64
+
+ // CompactionTotalSize limits total size of 'sorted table' for each level.
+ // The limits for each level will be calculated as:
+ // CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using
+ // CompactionTotalSizeMultiplierPerLevel.
+ //
+ // The default value is 10MiB.
+ CompactionTotalSize int
+
+ // CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
+ //
+ // The default value is 10.
+ CompactionTotalSizeMultiplier float64
+
+ // CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTotalSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTotalSizeMultiplierPerLevel []float64
+
+ // Comparer defines a total ordering over the space of []byte keys: a 'less
+ // than' relationship. The same comparison algorithm must be used for reads
+ // and writes over the lifetime of the DB.
+ //
+ // The default value uses the same ordering as bytes.Compare.
+ Comparer comparer.Comparer
+
+ // Compression defines the 'sorted table' block compression to use.
+ //
+ // The default value (DefaultCompression) uses snappy compression.
+ Compression Compression
+
+ // DisableBufferPool allows disable use of util.BufferPool functionality.
+ //
+ // The default value is false.
+ DisableBufferPool bool
+
+ // DisableBlockCache allows disable use of cache.Cache functionality on
+ // 'sorted table' block.
+ //
+ // The default value is false.
+ DisableBlockCache bool
+
+ // DisableCompactionBackoff allows disable compaction retry backoff.
+ //
+ // The default value is false.
+ DisableCompactionBackoff bool
+
+ // DisableLargeBatchTransaction allows disabling switch-to-transaction mode
+ // on large batch write. If enable batch writes large than WriteBuffer will
+ // use transaction.
+ //
+ // The default is false.
+ DisableLargeBatchTransaction bool
+
+ // ErrorIfExist defines whether an error should returned if the DB already
+ // exist.
+ //
+ // The default value is false.
+ ErrorIfExist bool
+
+ // ErrorIfMissing defines whether an error should returned if the DB is
+ // missing. If false then the database will be created if missing, otherwise
+ // an error will be returned.
+ //
+ // The default value is false.
+ ErrorIfMissing bool
+
+ // Filter defines an 'effective filter' to use. An 'effective filter'
+ // if defined will be used to generate per-table filter block.
+ // The filter name will be stored on disk.
+ // During reads LevelDB will try to find matching filter from
+ // 'effective filter' and 'alternative filters'.
+ //
+ // Filter can be changed after a DB has been created. It is recommended
+ // to put old filter to the 'alternative filters' to mitigate lack of
+ // filter during transition period.
+ //
+ // A filter is used to reduce disk reads when looking for a specific key.
+ //
+ // The default value is nil.
+ Filter filter.Filter
+
+ // IteratorSamplingRate defines approximate gap (in bytes) between read
+ // sampling of an iterator. The samples will be used to determine when
+ // compaction should be triggered.
+ //
+ // The default is 1MiB.
+ IteratorSamplingRate int
+
+ // NoSync allows completely disable fsync.
+ //
+ // The default is false.
+ NoSync bool
+
+ // NoWriteMerge allows disabling write merge.
+ //
+ // The default is false.
+ NoWriteMerge bool
+
+ // OpenFilesCacher provides cache algorithm for open files caching.
+ // Specify NoCacher to disable caching algorithm.
+ //
+ // The default value is LRUCacher.
+ OpenFilesCacher Cacher
+
+ // OpenFilesCacheCapacity defines the capacity of the open files caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher.
+ //
+ // The default value is 500.
+ OpenFilesCacheCapacity int
+
+ // If true then opens DB in read-only mode.
+ //
+ // The default value is false.
+ ReadOnly bool
+
+ // Strict defines the DB strict level.
+ Strict Strict
+
+ // WriteBuffer defines maximum size of a 'memdb' before flushed to
+ // 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk
+ // unsorted journal.
+ //
+ // LevelDB may held up to two 'memdb' at the same time.
+ //
+ // The default value is 4MiB.
+ WriteBuffer int
+
+ // WriteL0StopTrigger defines number of 'sorted table' at level-0 that will
+ // pause write.
+ //
+ // The default value is 12.
+ WriteL0PauseTrigger int
+
+ // WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that
+ // will trigger write slowdown.
+ //
+ // The default value is 8.
+ WriteL0SlowdownTrigger int
+}
+
+func (o *Options) GetAltFilters() []filter.Filter {
+ if o == nil {
+ return nil
+ }
+ return o.AltFilters
+}
+
+func (o *Options) GetBlockCacher() Cacher {
+ if o == nil || o.BlockCacher == nil {
+ return DefaultBlockCacher
+ } else if o.BlockCacher == NoCacher {
+ return nil
+ }
+ return o.BlockCacher
+}
+
+func (o *Options) GetBlockCacheCapacity() int {
+ if o == nil || o.BlockCacheCapacity == 0 {
+ return DefaultBlockCacheCapacity
+ } else if o.BlockCacheCapacity < 0 {
+ return 0
+ }
+ return o.BlockCacheCapacity
+}
+
+func (o *Options) GetBlockRestartInterval() int {
+ if o == nil || o.BlockRestartInterval <= 0 {
+ return DefaultBlockRestartInterval
+ }
+ return o.BlockRestartInterval
+}
+
+func (o *Options) GetBlockSize() int {
+ if o == nil || o.BlockSize <= 0 {
+ return DefaultBlockSize
+ }
+ return o.BlockSize
+}
+
+func (o *Options) GetCompactionExpandLimit(level int) int {
+ factor := DefaultCompactionExpandLimitFactor
+ if o != nil && o.CompactionExpandLimitFactor > 0 {
+ factor = o.CompactionExpandLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionGPOverlaps(level int) int {
+ factor := DefaultCompactionGPOverlapsFactor
+ if o != nil && o.CompactionGPOverlapsFactor > 0 {
+ factor = o.CompactionGPOverlapsFactor
+ }
+ return o.GetCompactionTableSize(level+2) * factor
+}
+
+func (o *Options) GetCompactionL0Trigger() int {
+ if o == nil || o.CompactionL0Trigger == 0 {
+ return DefaultCompactionL0Trigger
+ }
+ return o.CompactionL0Trigger
+}
+
+func (o *Options) GetCompactionSourceLimit(level int) int {
+ factor := DefaultCompactionSourceLimitFactor
+ if o != nil && o.CompactionSourceLimitFactor > 0 {
+ factor = o.CompactionSourceLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionTableSize(level int) int {
+ var (
+ base = DefaultCompactionTableSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTableSize > 0 {
+ base = o.CompactionTableSize
+ }
+ if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTableSizeMultiplierPerLevel[level]
+ } else if o.CompactionTableSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level))
+ }
+ return int(float64(base) * mult)
+}
+
+func (o *Options) GetCompactionTotalSize(level int) int64 {
+ var (
+ base = DefaultCompactionTotalSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTotalSize > 0 {
+ base = o.CompactionTotalSize
+ }
+ if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTotalSizeMultiplierPerLevel[level]
+ } else if o.CompactionTotalSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level))
+ }
+ return int64(float64(base) * mult)
+}
+
+func (o *Options) GetComparer() comparer.Comparer {
+ if o == nil || o.Comparer == nil {
+ return comparer.DefaultComparer
+ }
+ return o.Comparer
+}
+
+func (o *Options) GetCompression() Compression {
+ if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression {
+ return DefaultCompressionType
+ }
+ return o.Compression
+}
+
+func (o *Options) GetDisableBufferPool() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableBufferPool
+}
+
+func (o *Options) GetDisableBlockCache() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableBlockCache
+}
+
+func (o *Options) GetDisableCompactionBackoff() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableCompactionBackoff
+}
+
+func (o *Options) GetDisableLargeBatchTransaction() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableLargeBatchTransaction
+}
+
+func (o *Options) GetErrorIfExist() bool {
+ if o == nil {
+ return false
+ }
+ return o.ErrorIfExist
+}
+
+func (o *Options) GetErrorIfMissing() bool {
+ if o == nil {
+ return false
+ }
+ return o.ErrorIfMissing
+}
+
+func (o *Options) GetFilter() filter.Filter {
+ if o == nil {
+ return nil
+ }
+ return o.Filter
+}
+
+func (o *Options) GetIteratorSamplingRate() int {
+ if o == nil || o.IteratorSamplingRate <= 0 {
+ return DefaultIteratorSamplingRate
+ }
+ return o.IteratorSamplingRate
+}
+
+func (o *Options) GetNoSync() bool {
+ if o == nil {
+ return false
+ }
+ return o.NoSync
+}
+
+func (o *Options) GetNoWriteMerge() bool {
+ if o == nil {
+ return false
+ }
+ return o.NoWriteMerge
+}
+
+func (o *Options) GetOpenFilesCacher() Cacher {
+ if o == nil || o.OpenFilesCacher == nil {
+ return DefaultOpenFilesCacher
+ }
+ if o.OpenFilesCacher == NoCacher {
+ return nil
+ }
+ return o.OpenFilesCacher
+}
+
+func (o *Options) GetOpenFilesCacheCapacity() int {
+ if o == nil || o.OpenFilesCacheCapacity == 0 {
+ return DefaultOpenFilesCacheCapacity
+ } else if o.OpenFilesCacheCapacity < 0 {
+ return 0
+ }
+ return o.OpenFilesCacheCapacity
+}
+
+func (o *Options) GetReadOnly() bool {
+ if o == nil {
+ return false
+ }
+ return o.ReadOnly
+}
+
+func (o *Options) GetStrict(strict Strict) bool {
+ if o == nil || o.Strict == 0 {
+ return DefaultStrict&strict != 0
+ }
+ return o.Strict&strict != 0
+}
+
+func (o *Options) GetWriteBuffer() int {
+ if o == nil || o.WriteBuffer <= 0 {
+ return DefaultWriteBuffer
+ }
+ return o.WriteBuffer
+}
+
+func (o *Options) GetWriteL0PauseTrigger() int {
+ if o == nil || o.WriteL0PauseTrigger == 0 {
+ return DefaultWriteL0PauseTrigger
+ }
+ return o.WriteL0PauseTrigger
+}
+
+func (o *Options) GetWriteL0SlowdownTrigger() int {
+ if o == nil || o.WriteL0SlowdownTrigger == 0 {
+ return DefaultWriteL0SlowdownTrigger
+ }
+ return o.WriteL0SlowdownTrigger
+}
+
+// ReadOptions holds the optional parameters for 'read operation'. The
+// 'read operation' includes Get, Find and NewIterator.
+type ReadOptions struct {
+ // DontFillCache defines whether block reads for this 'read operation'
+ // should be cached. If false then the block will be cached. This does
+ // not affects already cached block.
+ //
+ // The default value is false.
+ DontFillCache bool
+
+ // Strict will be OR'ed with global DB 'strict level' unless StrictOverride
+ // is present. Currently only StrictReader that has effect here.
+ Strict Strict
+}
+
+func (ro *ReadOptions) GetDontFillCache() bool {
+ if ro == nil {
+ return false
+ }
+ return ro.DontFillCache
+}
+
+func (ro *ReadOptions) GetStrict(strict Strict) bool {
+ if ro == nil {
+ return false
+ }
+ return ro.Strict&strict != 0
+}
+
+// WriteOptions holds the optional parameters for 'write operation'. The
+// 'write operation' includes Write, Put and Delete.
+type WriteOptions struct {
+ // NoWriteMerge allows disabling write merge.
+ //
+ // The default is false.
+ NoWriteMerge bool
+
+ // Sync is whether to sync underlying writes from the OS buffer cache
+ // through to actual disk, if applicable. Setting Sync can result in
+ // slower writes.
+ //
+ // If false, and the machine crashes, then some recent writes may be lost.
+ // Note that if it is just the process that crashes (and the machine does
+ // not) then no writes will be lost.
+ //
+ // In other words, Sync being false has the same semantics as a write
+ // system call. Sync being true means write followed by fsync.
+ //
+ // The default value is false.
+ Sync bool
+}
+
+func (wo *WriteOptions) GetNoWriteMerge() bool {
+ if wo == nil {
+ return false
+ }
+ return wo.NoWriteMerge
+}
+
+func (wo *WriteOptions) GetSync() bool {
+ if wo == nil {
+ return false
+ }
+ return wo.Sync
+}
+
+func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool {
+ if ro.GetStrict(StrictOverride) {
+ return ro.GetStrict(strict)
+ } else {
+ return o.GetStrict(strict) || ro.GetStrict(strict)
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
new file mode 100644
index 000000000..b072b1ac4
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
@@ -0,0 +1,107 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func dupOptions(o *opt.Options) *opt.Options {
+ newo := &opt.Options{}
+ if o != nil {
+ *newo = *o
+ }
+ if newo.Strict == 0 {
+ newo.Strict = opt.DefaultStrict
+ }
+ return newo
+}
+
+func (s *session) setOptions(o *opt.Options) {
+ no := dupOptions(o)
+ // Alternative filters.
+ if filters := o.GetAltFilters(); len(filters) > 0 {
+ no.AltFilters = make([]filter.Filter, len(filters))
+ for i, filter := range filters {
+ no.AltFilters[i] = &iFilter{filter}
+ }
+ }
+ // Comparer.
+ s.icmp = &iComparer{o.GetComparer()}
+ no.Comparer = s.icmp
+ // Filter.
+ if filter := o.GetFilter(); filter != nil {
+ no.Filter = &iFilter{filter}
+ }
+
+ s.o = &cachedOptions{Options: no}
+ s.o.cache()
+}
+
+const optCachedLevel = 7
+
+type cachedOptions struct {
+ *opt.Options
+
+ compactionExpandLimit []int
+ compactionGPOverlaps []int
+ compactionSourceLimit []int
+ compactionTableSize []int
+ compactionTotalSize []int64
+}
+
+func (co *cachedOptions) cache() {
+ co.compactionExpandLimit = make([]int, optCachedLevel)
+ co.compactionGPOverlaps = make([]int, optCachedLevel)
+ co.compactionSourceLimit = make([]int, optCachedLevel)
+ co.compactionTableSize = make([]int, optCachedLevel)
+ co.compactionTotalSize = make([]int64, optCachedLevel)
+
+ for level := 0; level < optCachedLevel; level++ {
+ co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
+ co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
+ co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
+ co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level)
+ co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level)
+ }
+}
+
+func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
+ if level < optCachedLevel {
+ return co.compactionExpandLimit[level]
+ }
+ return co.Options.GetCompactionExpandLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
+ if level < optCachedLevel {
+ return co.compactionGPOverlaps[level]
+ }
+ return co.Options.GetCompactionGPOverlaps(level)
+}
+
+func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
+ if level < optCachedLevel {
+ return co.compactionSourceLimit[level]
+ }
+ return co.Options.GetCompactionSourceLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionTableSize(level int) int {
+ if level < optCachedLevel {
+ return co.compactionTableSize[level]
+ }
+ return co.Options.GetCompactionTableSize(level)
+}
+
+func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
+ if level < optCachedLevel {
+ return co.compactionTotalSize[level]
+ }
+ return co.Options.GetCompactionTotalSize(level)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session.go b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
new file mode 100644
index 000000000..f3e747701
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
@@ -0,0 +1,208 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "sync"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrManifestCorrupted records manifest corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrManifestCorrupted struct {
+ Field string
+ Reason string
+}
+
+func (e *ErrManifestCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
+}
+
+func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error {
+ return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason})
+}
+
+// session represent a persistent database session.
+type session struct {
+ // Need 64-bit alignment.
+ stNextFileNum int64 // current unused file number
+ stJournalNum int64 // current journal file number; need external synchronization
+ stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb
+ stTempFileNum int64
+ stSeqNum uint64 // last mem compacted seq; need external synchronization
+
+ stor storage.Storage
+ storLock storage.Locker
+ o *cachedOptions
+ icmp *iComparer
+ tops *tOps
+
+ manifest *journal.Writer
+ manifestWriter storage.Writer
+ manifestFd storage.FileDesc
+
+ stCompPtrs []internalKey // compaction pointers; need external synchronization
+ stVersion *version // current version
+ vmu sync.Mutex
+}
+
+// Creates new initialized session instance.
+func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
+ if stor == nil {
+ return nil, os.ErrInvalid
+ }
+ storLock, err := stor.Lock()
+ if err != nil {
+ return
+ }
+ s = &session{
+ stor: stor,
+ storLock: storLock,
+ }
+ s.setOptions(o)
+ s.tops = newTableOps(s)
+ s.setVersion(newVersion(s))
+ s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed")
+ return
+}
+
+// Close session.
+func (s *session) close() {
+ s.tops.close()
+ if s.manifest != nil {
+ s.manifest.Close()
+ }
+ if s.manifestWriter != nil {
+ s.manifestWriter.Close()
+ }
+ s.manifest = nil
+ s.manifestWriter = nil
+ s.setVersion(&version{s: s, closing: true})
+}
+
+// Release session lock.
+func (s *session) release() {
+ s.storLock.Unlock()
+}
+
+// Create a new database session; need external synchronization.
+func (s *session) create() error {
+ // create manifest
+ return s.newManifest(nil, nil)
+}
+
+// Recover a database session; need external synchronization.
+func (s *session) recover() (err error) {
+ defer func() {
+ if os.IsNotExist(err) {
+ // Don't return os.ErrNotExist if the underlying storage contains
+ // other files that belong to LevelDB. So the DB won't get trashed.
+ if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 {
+ err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
+ }
+ }
+ }()
+
+ fd, err := s.stor.GetMeta()
+ if err != nil {
+ return
+ }
+
+ reader, err := s.stor.Open(fd)
+ if err != nil {
+ return
+ }
+ defer reader.Close()
+
+ var (
+ // Options.
+ strict = s.o.GetStrict(opt.StrictManifest)
+
+ jr = journal.NewReader(reader, dropper{s, fd}, strict, true)
+ rec = &sessionRecord{}
+ staging = s.stVersion.newStaging()
+ )
+ for {
+ var r io.Reader
+ r, err = jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ break
+ }
+ return errors.SetFd(err, fd)
+ }
+
+ err = rec.decode(r)
+ if err == nil {
+ // save compact pointers
+ for _, r := range rec.compPtrs {
+ s.setCompPtr(r.level, internalKey(r.ikey))
+ }
+ // commit record to version staging
+ staging.commit(rec)
+ } else {
+ err = errors.SetFd(err, fd)
+ if strict || !errors.IsCorrupted(err) {
+ return
+ }
+ s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd))
+ }
+ rec.resetCompPtrs()
+ rec.resetAddedTables()
+ rec.resetDeletedTables()
+ }
+
+ switch {
+ case !rec.has(recComparer):
+ return newErrManifestCorrupted(fd, "comparer", "missing")
+ case rec.comparer != s.icmp.uName():
+ return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
+ case !rec.has(recNextFileNum):
+ return newErrManifestCorrupted(fd, "next-file-num", "missing")
+ case !rec.has(recJournalNum):
+ return newErrManifestCorrupted(fd, "journal-file-num", "missing")
+ case !rec.has(recSeqNum):
+ return newErrManifestCorrupted(fd, "seq-num", "missing")
+ }
+
+ s.manifestFd = fd
+ s.setVersion(staging.finish())
+ s.setNextFileNum(rec.nextFileNum)
+ s.recordCommited(rec)
+ return nil
+}
+
+// Commit session; need external synchronization.
+func (s *session) commit(r *sessionRecord) (err error) {
+ v := s.version()
+ defer v.release()
+
+ // spawn new version based on current version
+ nv := v.spawn(r)
+
+ if s.manifest == nil {
+ // manifest journal writer not yet created, create one
+ err = s.newManifest(r, nv)
+ } else {
+ err = s.flushManifest(r)
+ }
+
+ // finally, apply new version if no error rise
+ if err == nil {
+ s.setVersion(nv)
+ }
+
+ return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
new file mode 100644
index 000000000..089cd00b2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
@@ -0,0 +1,302 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int {
+ v := s.version()
+ defer v.release()
+ return v.pickMemdbLevel(umin, umax, maxLevel)
+}
+
+func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) {
+ // Create sorted table.
+ iter := mdb.NewIterator(nil)
+ defer iter.Release()
+ t, n, err := s.tops.createFrom(iter)
+ if err != nil {
+ return 0, err
+ }
+
+ // Pick level other than zero can cause compaction issue with large
+ // bulk insert and delete on strictly incrementing key-space. The
+ // problem is that the small deletion markers trapped at lower level,
+ // while key/value entries keep growing at higher level. Since the
+ // key-space is strictly incrementing it will not overlaps with
+ // higher level, thus maximum possible level is always picked, while
+ // overlapping deletion marker pushed into lower level.
+ // See: https://github.com/syndtr/goleveldb/issues/127.
+ flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel)
+ rec.addTableFile(flushLevel, t)
+
+ s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+ return flushLevel, nil
+}
+
+// Pick a compaction based on current state; need external synchronization.
+func (s *session) pickCompaction() *compaction {
+ v := s.version()
+
+ var sourceLevel int
+ var t0 tFiles
+ if v.cScore >= 1 {
+ sourceLevel = v.cLevel
+ cptr := s.getCompPtr(sourceLevel)
+ tables := v.levels[sourceLevel]
+ for _, t := range tables {
+ if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
+ t0 = append(t0, t)
+ break
+ }
+ }
+ if len(t0) == 0 {
+ t0 = append(t0, tables[0])
+ }
+ } else {
+ if p := atomic.LoadPointer(&v.cSeek); p != nil {
+ ts := (*tSet)(p)
+ sourceLevel = ts.level
+ t0 = append(t0, ts.table)
+ } else {
+ v.release()
+ return nil
+ }
+ }
+
+ return newCompaction(s, v, sourceLevel, t0)
+}
+
+// Create compaction from given level and range; need external synchronization.
+func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction {
+ v := s.version()
+
+ if sourceLevel >= len(v.levels) {
+ v.release()
+ return nil
+ }
+
+ t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0)
+ if len(t0) == 0 {
+ v.release()
+ return nil
+ }
+
+ // Avoid compacting too much in one shot in case the range is large.
+ // But we cannot do this for level-0 since level-0 files can overlap
+ // and we must not pick one file and drop another older file if the
+ // two files overlap.
+ if !noLimit && sourceLevel > 0 {
+ limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel))
+ total := int64(0)
+ for i, t := range t0 {
+ total += t.size
+ if total >= limit {
+ s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
+ t0 = t0[:i+1]
+ break
+ }
+ }
+ }
+
+ return newCompaction(s, v, sourceLevel, t0)
+}
+
+func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction {
+ c := &compaction{
+ s: s,
+ v: v,
+ sourceLevel: sourceLevel,
+ levels: [2]tFiles{t0, nil},
+ maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)),
+ tPtrs: make([]int, len(v.levels)),
+ }
+ c.expand()
+ c.save()
+ return c
+}
+
+// compaction represent a compaction state.
+type compaction struct {
+ s *session
+ v *version
+
+ sourceLevel int
+ levels [2]tFiles
+ maxGPOverlaps int64
+
+ gp tFiles
+ gpi int
+ seenKey bool
+ gpOverlappedBytes int64
+ imin, imax internalKey
+ tPtrs []int
+ released bool
+
+ snapGPI int
+ snapSeenKey bool
+ snapGPOverlappedBytes int64
+ snapTPtrs []int
+}
+
+func (c *compaction) save() {
+ c.snapGPI = c.gpi
+ c.snapSeenKey = c.seenKey
+ c.snapGPOverlappedBytes = c.gpOverlappedBytes
+ c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
+}
+
+func (c *compaction) restore() {
+ c.gpi = c.snapGPI
+ c.seenKey = c.snapSeenKey
+ c.gpOverlappedBytes = c.snapGPOverlappedBytes
+ c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
+}
+
+func (c *compaction) release() {
+ if !c.released {
+ c.released = true
+ c.v.release()
+ }
+}
+
+// Expand compacted tables; need external synchronization.
+func (c *compaction) expand() {
+ limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel))
+ vt0 := c.v.levels[c.sourceLevel]
+ vt1 := tFiles{}
+ if level := c.sourceLevel + 1; level < len(c.v.levels) {
+ vt1 = c.v.levels[level]
+ }
+
+ t0, t1 := c.levels[0], c.levels[1]
+ imin, imax := t0.getRange(c.s.icmp)
+ // We expand t0 here just incase ukey hop across tables.
+ t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0)
+ if len(t0) != len(c.levels[0]) {
+ imin, imax = t0.getRange(c.s.icmp)
+ }
+ t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
+ // Get entire range covered by compaction.
+ amin, amax := append(t0, t1...).getRange(c.s.icmp)
+
+ // See if we can grow the number of inputs in "sourceLevel" without
+ // changing the number of "sourceLevel+1" files we pick up.
+ if len(t1) > 0 {
+ exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0)
+ if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
+ xmin, xmax := exp0.getRange(c.s.icmp)
+ exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
+ if len(exp1) == len(t1) {
+ c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
+ c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
+ len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
+ imin, imax = xmin, xmax
+ t0, t1 = exp0, exp1
+ amin, amax = append(t0, t1...).getRange(c.s.icmp)
+ }
+ }
+ }
+
+ // Compute the set of grandparent files that overlap this compaction
+ // (parent == sourceLevel+1; grandparent == sourceLevel+2)
+ if level := c.sourceLevel + 2; level < len(c.v.levels) {
+ c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
+ }
+
+ c.levels[0], c.levels[1] = t0, t1
+ c.imin, c.imax = imin, imax
+}
+
+// Check whether compaction is trivial.
+func (c *compaction) trivial() bool {
+ return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
+}
+
+func (c *compaction) baseLevelForKey(ukey []byte) bool {
+ for level := c.sourceLevel + 2; level < len(c.v.levels); level++ {
+ tables := c.v.levels[level]
+ for c.tPtrs[level] < len(tables) {
+ t := tables[c.tPtrs[level]]
+ if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
+ // We've advanced far enough.
+ if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ // Key falls in this file's range, so definitely not base level.
+ return false
+ }
+ break
+ }
+ c.tPtrs[level]++
+ }
+ }
+ return true
+}
+
+func (c *compaction) shouldStopBefore(ikey internalKey) bool {
+ for ; c.gpi < len(c.gp); c.gpi++ {
+ gp := c.gp[c.gpi]
+ if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
+ break
+ }
+ if c.seenKey {
+ c.gpOverlappedBytes += gp.size
+ }
+ }
+ c.seenKey = true
+
+ if c.gpOverlappedBytes > c.maxGPOverlaps {
+ // Too much overlap for current output; start new output.
+ c.gpOverlappedBytes = 0
+ return true
+ }
+ return false
+}
+
+// Creates an iterator.
+func (c *compaction) newIterator() iterator.Iterator {
+ // Creates iterator slice.
+ icap := len(c.levels)
+ if c.sourceLevel == 0 {
+ // Special case for level-0.
+ icap = len(c.levels[0]) + 1
+ }
+ its := make([]iterator.Iterator, 0, icap)
+
+ // Options.
+ ro := &opt.ReadOptions{
+ DontFillCache: true,
+ Strict: opt.StrictOverride,
+ }
+ strict := c.s.o.GetStrict(opt.StrictCompaction)
+ if strict {
+ ro.Strict |= opt.StrictReader
+ }
+
+ for i, tables := range c.levels {
+ if len(tables) == 0 {
+ continue
+ }
+
+ // Level-0 is not sorted and may overlaps each other.
+ if c.sourceLevel+i == 0 {
+ for _, t := range tables {
+ its = append(its, c.s.tops.newIterator(t, nil, ro))
+ }
+ } else {
+ it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
+ its = append(its, it)
+ }
+ }
+
+ return iterator.NewMergedIterator(its, c.s.icmp, strict)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
new file mode 100644
index 000000000..854e1aa6f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
@@ -0,0 +1,323 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "bufio"
+ "encoding/binary"
+ "io"
+ "strings"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+type byteReader interface {
+ io.Reader
+ io.ByteReader
+}
+
+// These numbers are written to disk and should not be changed.
+const (
+ recComparer = 1
+ recJournalNum = 2
+ recNextFileNum = 3
+ recSeqNum = 4
+ recCompPtr = 5
+ recDelTable = 6
+ recAddTable = 7
+ // 8 was used for large value refs
+ recPrevJournalNum = 9
+)
+
+type cpRecord struct {
+ level int
+ ikey internalKey
+}
+
+type atRecord struct {
+ level int
+ num int64
+ size int64
+ imin internalKey
+ imax internalKey
+}
+
+type dtRecord struct {
+ level int
+ num int64
+}
+
+type sessionRecord struct {
+ hasRec int
+ comparer string
+ journalNum int64
+ prevJournalNum int64
+ nextFileNum int64
+ seqNum uint64
+ compPtrs []cpRecord
+ addedTables []atRecord
+ deletedTables []dtRecord
+
+ scratch [binary.MaxVarintLen64]byte
+ err error
+}
+
+func (p *sessionRecord) has(rec int) bool {
+ return p.hasRec&(1<<uint(rec)) != 0
+}
+
+func (p *sessionRecord) setComparer(name string) {
+ p.hasRec |= 1 << recComparer
+ p.comparer = name
+}
+
+func (p *sessionRecord) setJournalNum(num int64) {
+ p.hasRec |= 1 << recJournalNum
+ p.journalNum = num
+}
+
+func (p *sessionRecord) setPrevJournalNum(num int64) {
+ p.hasRec |= 1 << recPrevJournalNum
+ p.prevJournalNum = num
+}
+
+func (p *sessionRecord) setNextFileNum(num int64) {
+ p.hasRec |= 1 << recNextFileNum
+ p.nextFileNum = num
+}
+
+func (p *sessionRecord) setSeqNum(num uint64) {
+ p.hasRec |= 1 << recSeqNum
+ p.seqNum = num
+}
+
+func (p *sessionRecord) addCompPtr(level int, ikey internalKey) {
+ p.hasRec |= 1 << recCompPtr
+ p.compPtrs = append(p.compPtrs, cpRecord{level, ikey})
+}
+
+func (p *sessionRecord) resetCompPtrs() {
+ p.hasRec &= ^(1 << recCompPtr)
+ p.compPtrs = p.compPtrs[:0]
+}
+
+func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) {
+ p.hasRec |= 1 << recAddTable
+ p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax})
+}
+
+func (p *sessionRecord) addTableFile(level int, t *tFile) {
+ p.addTable(level, t.fd.Num, t.size, t.imin, t.imax)
+}
+
+func (p *sessionRecord) resetAddedTables() {
+ p.hasRec &= ^(1 << recAddTable)
+ p.addedTables = p.addedTables[:0]
+}
+
+func (p *sessionRecord) delTable(level int, num int64) {
+ p.hasRec |= 1 << recDelTable
+ p.deletedTables = append(p.deletedTables, dtRecord{level, num})
+}
+
+func (p *sessionRecord) resetDeletedTables() {
+ p.hasRec &= ^(1 << recDelTable)
+ p.deletedTables = p.deletedTables[:0]
+}
+
+func (p *sessionRecord) putUvarint(w io.Writer, x uint64) {
+ if p.err != nil {
+ return
+ }
+ n := binary.PutUvarint(p.scratch[:], x)
+ _, p.err = w.Write(p.scratch[:n])
+}
+
+func (p *sessionRecord) putVarint(w io.Writer, x int64) {
+ if x < 0 {
+ panic("invalid negative value")
+ }
+ p.putUvarint(w, uint64(x))
+}
+
+func (p *sessionRecord) putBytes(w io.Writer, x []byte) {
+ if p.err != nil {
+ return
+ }
+ p.putUvarint(w, uint64(len(x)))
+ if p.err != nil {
+ return
+ }
+ _, p.err = w.Write(x)
+}
+
+func (p *sessionRecord) encode(w io.Writer) error {
+ p.err = nil
+ if p.has(recComparer) {
+ p.putUvarint(w, recComparer)
+ p.putBytes(w, []byte(p.comparer))
+ }
+ if p.has(recJournalNum) {
+ p.putUvarint(w, recJournalNum)
+ p.putVarint(w, p.journalNum)
+ }
+ if p.has(recNextFileNum) {
+ p.putUvarint(w, recNextFileNum)
+ p.putVarint(w, p.nextFileNum)
+ }
+ if p.has(recSeqNum) {
+ p.putUvarint(w, recSeqNum)
+ p.putUvarint(w, p.seqNum)
+ }
+ for _, r := range p.compPtrs {
+ p.putUvarint(w, recCompPtr)
+ p.putUvarint(w, uint64(r.level))
+ p.putBytes(w, r.ikey)
+ }
+ for _, r := range p.deletedTables {
+ p.putUvarint(w, recDelTable)
+ p.putUvarint(w, uint64(r.level))
+ p.putVarint(w, r.num)
+ }
+ for _, r := range p.addedTables {
+ p.putUvarint(w, recAddTable)
+ p.putUvarint(w, uint64(r.level))
+ p.putVarint(w, r.num)
+ p.putVarint(w, r.size)
+ p.putBytes(w, r.imin)
+ p.putBytes(w, r.imax)
+ }
+ return p.err
+}
+
+func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 {
+ if p.err != nil {
+ return 0
+ }
+ x, err := binary.ReadUvarint(r)
+ if err != nil {
+ if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
+ } else if strings.HasPrefix(err.Error(), "binary:") {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, err.Error()})
+ } else {
+ p.err = err
+ }
+ return 0
+ }
+ return x
+}
+
+func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 {
+ return p.readUvarintMayEOF(field, r, false)
+}
+
+func (p *sessionRecord) readVarint(field string, r io.ByteReader) int64 {
+ x := int64(p.readUvarintMayEOF(field, r, false))
+ if x < 0 {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "invalid negative value"})
+ }
+ return x
+}
+
+func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
+ if p.err != nil {
+ return nil
+ }
+ n := p.readUvarint(field, r)
+ if p.err != nil {
+ return nil
+ }
+ x := make([]byte, n)
+ _, p.err = io.ReadFull(r, x)
+ if p.err != nil {
+ if p.err == io.ErrUnexpectedEOF {
+ p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
+ }
+ return nil
+ }
+ return x
+}
+
+func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
+ if p.err != nil {
+ return 0
+ }
+ x := p.readUvarint(field, r)
+ if p.err != nil {
+ return 0
+ }
+ return int(x)
+}
+
+func (p *sessionRecord) decode(r io.Reader) error {
+ br, ok := r.(byteReader)
+ if !ok {
+ br = bufio.NewReader(r)
+ }
+ p.err = nil
+ for p.err == nil {
+ rec := p.readUvarintMayEOF("field-header", br, true)
+ if p.err != nil {
+ if p.err == io.EOF {
+ return nil
+ }
+ return p.err
+ }
+ switch rec {
+ case recComparer:
+ x := p.readBytes("comparer", br)
+ if p.err == nil {
+ p.setComparer(string(x))
+ }
+ case recJournalNum:
+ x := p.readVarint("journal-num", br)
+ if p.err == nil {
+ p.setJournalNum(x)
+ }
+ case recPrevJournalNum:
+ x := p.readVarint("prev-journal-num", br)
+ if p.err == nil {
+ p.setPrevJournalNum(x)
+ }
+ case recNextFileNum:
+ x := p.readVarint("next-file-num", br)
+ if p.err == nil {
+ p.setNextFileNum(x)
+ }
+ case recSeqNum:
+ x := p.readUvarint("seq-num", br)
+ if p.err == nil {
+ p.setSeqNum(x)
+ }
+ case recCompPtr:
+ level := p.readLevel("comp-ptr.level", br)
+ ikey := p.readBytes("comp-ptr.ikey", br)
+ if p.err == nil {
+ p.addCompPtr(level, internalKey(ikey))
+ }
+ case recAddTable:
+ level := p.readLevel("add-table.level", br)
+ num := p.readVarint("add-table.num", br)
+ size := p.readVarint("add-table.size", br)
+ imin := p.readBytes("add-table.imin", br)
+ imax := p.readBytes("add-table.imax", br)
+ if p.err == nil {
+ p.addTable(level, num, size, imin, imax)
+ }
+ case recDelTable:
+ level := p.readLevel("del-table.level", br)
+ num := p.readVarint("del-table.num", br)
+ if p.err == nil {
+ p.delTable(level, num)
+ }
+ }
+ }
+
+ return p.err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go
new file mode 100644
index 000000000..34ad61798
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go
@@ -0,0 +1,258 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// Logging.
+
+type dropper struct {
+ s *session
+ fd storage.FileDesc
+}
+
+func (d dropper) Drop(err error) {
+ if e, ok := err.(*journal.ErrCorrupted); ok {
+ d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason)
+ } else {
+ d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err)
+ }
+}
+
+func (s *session) log(v ...interface{}) { s.stor.Log(fmt.Sprint(v...)) }
+func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf(format, v...)) }
+
+// File utils.
+
+func (s *session) newTemp() storage.FileDesc {
+ num := atomic.AddInt64(&s.stTempFileNum, 1) - 1
+ return storage.FileDesc{storage.TypeTemp, num}
+}
+
+// Session state.
+
+// Get current version. This will incr version ref, must call
+// version.release (exactly once) after use.
+func (s *session) version() *version {
+ s.vmu.Lock()
+ defer s.vmu.Unlock()
+ s.stVersion.ref++
+ return s.stVersion
+}
+
+func (s *session) tLen(level int) int {
+ s.vmu.Lock()
+ defer s.vmu.Unlock()
+ return s.stVersion.tLen(level)
+}
+
+// Set current version to v.
+func (s *session) setVersion(v *version) {
+ s.vmu.Lock()
+ v.ref = 1 // Holds by session.
+ if old := s.stVersion; old != nil {
+ v.ref++ // Holds by old version.
+ old.next = v
+ old.releaseNB()
+ }
+ s.stVersion = v
+ s.vmu.Unlock()
+}
+
+// Get current unused file number.
+func (s *session) nextFileNum() int64 {
+ return atomic.LoadInt64(&s.stNextFileNum)
+}
+
+// Set current unused file number to num.
+func (s *session) setNextFileNum(num int64) {
+ atomic.StoreInt64(&s.stNextFileNum, num)
+}
+
+// Mark file number as used.
+func (s *session) markFileNum(num int64) {
+ nextFileNum := num + 1
+ for {
+ old, x := s.stNextFileNum, nextFileNum
+ if old > x {
+ x = old
+ }
+ if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+ break
+ }
+ }
+}
+
+// Allocate a file number.
+func (s *session) allocFileNum() int64 {
+ return atomic.AddInt64(&s.stNextFileNum, 1) - 1
+}
+
+// Reuse given file number.
+func (s *session) reuseFileNum(num int64) {
+ for {
+ old, x := s.stNextFileNum, num
+ if old != x+1 {
+ x = old
+ }
+ if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+ break
+ }
+ }
+}
+
+// Set compaction ptr at given level; need external synchronization.
+func (s *session) setCompPtr(level int, ik internalKey) {
+ if level >= len(s.stCompPtrs) {
+ newCompPtrs := make([]internalKey, level+1)
+ copy(newCompPtrs, s.stCompPtrs)
+ s.stCompPtrs = newCompPtrs
+ }
+ s.stCompPtrs[level] = append(internalKey{}, ik...)
+}
+
+// Get compaction ptr at given level; need external synchronization.
+func (s *session) getCompPtr(level int) internalKey {
+ if level >= len(s.stCompPtrs) {
+ return nil
+ }
+ return s.stCompPtrs[level]
+}
+
+// Manifest related utils.
+
+// Fill given session record obj with current states; need external
+// synchronization.
+func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
+ r.setNextFileNum(s.nextFileNum())
+
+ if snapshot {
+ if !r.has(recJournalNum) {
+ r.setJournalNum(s.stJournalNum)
+ }
+
+ if !r.has(recSeqNum) {
+ r.setSeqNum(s.stSeqNum)
+ }
+
+ for level, ik := range s.stCompPtrs {
+ if ik != nil {
+ r.addCompPtr(level, ik)
+ }
+ }
+
+ r.setComparer(s.icmp.uName())
+ }
+}
+
+// Mark if record has been committed, this will update session state;
+// need external synchronization.
+func (s *session) recordCommited(rec *sessionRecord) {
+ if rec.has(recJournalNum) {
+ s.stJournalNum = rec.journalNum
+ }
+
+ if rec.has(recPrevJournalNum) {
+ s.stPrevJournalNum = rec.prevJournalNum
+ }
+
+ if rec.has(recSeqNum) {
+ s.stSeqNum = rec.seqNum
+ }
+
+ for _, r := range rec.compPtrs {
+ s.setCompPtr(r.level, internalKey(r.ikey))
+ }
+}
+
+// Create a new manifest file; need external synchronization.
+func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
+ fd := storage.FileDesc{storage.TypeManifest, s.allocFileNum()}
+ writer, err := s.stor.Create(fd)
+ if err != nil {
+ return
+ }
+ jw := journal.NewWriter(writer)
+
+ if v == nil {
+ v = s.version()
+ defer v.release()
+ }
+ if rec == nil {
+ rec = &sessionRecord{}
+ }
+ s.fillRecord(rec, true)
+ v.fillRecord(rec)
+
+ defer func() {
+ if err == nil {
+ s.recordCommited(rec)
+ if s.manifest != nil {
+ s.manifest.Close()
+ }
+ if s.manifestWriter != nil {
+ s.manifestWriter.Close()
+ }
+ if !s.manifestFd.Zero() {
+ s.stor.Remove(s.manifestFd)
+ }
+ s.manifestFd = fd
+ s.manifestWriter = writer
+ s.manifest = jw
+ } else {
+ writer.Close()
+ s.stor.Remove(fd)
+ s.reuseFileNum(fd.Num)
+ }
+ }()
+
+ w, err := jw.Next()
+ if err != nil {
+ return
+ }
+ err = rec.encode(w)
+ if err != nil {
+ return
+ }
+ err = jw.Flush()
+ if err != nil {
+ return
+ }
+ err = s.stor.SetMeta(fd)
+ return
+}
+
+// Flush record to disk.
+func (s *session) flushManifest(rec *sessionRecord) (err error) {
+ s.fillRecord(rec, false)
+ w, err := s.manifest.Next()
+ if err != nil {
+ return
+ }
+ err = rec.encode(w)
+ if err != nil {
+ return
+ }
+ err = s.manifest.Flush()
+ if err != nil {
+ return
+ }
+ if !s.o.GetNoSync() {
+ err = s.manifestWriter.Sync()
+ if err != nil {
+ return
+ }
+ }
+ s.recordCommited(rec)
+ return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
new file mode 100644
index 000000000..e53434cab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
@@ -0,0 +1,583 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reservefs.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+)
+
+var (
+ errFileOpen = errors.New("leveldb/storage: file still open")
+ errReadOnly = errors.New("leveldb/storage: storage is read-only")
+)
+
+type fileLock interface {
+ release() error
+}
+
+type fileStorageLock struct {
+ fs *fileStorage
+}
+
+func (lock *fileStorageLock) Unlock() {
+ if lock.fs != nil {
+ lock.fs.mu.Lock()
+ defer lock.fs.mu.Unlock()
+ if lock.fs.slock == lock {
+ lock.fs.slock = nil
+ }
+ }
+}
+
+const logSizeThreshold = 1024 * 1024 // 1 MiB
+
+// fileStorage is a file-system backed storage.
+type fileStorage struct {
+ path string
+ readOnly bool
+
+ mu sync.Mutex
+ flock fileLock
+ slock *fileStorageLock
+ logw *os.File
+ logSize int64
+ buf []byte
+ // Opened file counter; if open < 0 means closed.
+ open int
+ day int
+}
+
+// OpenFile returns a new filesytem-backed storage implementation with the given
+// path. This also acquire a file lock, so any subsequent attempt to open the
+// same path will fail.
+//
+// The storage must be closed after use, by calling Close method.
+func OpenFile(path string, readOnly bool) (Storage, error) {
+ if fi, err := os.Stat(path); err == nil {
+ if !fi.IsDir() {
+ return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path)
+ }
+ } else if os.IsNotExist(err) && !readOnly {
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return nil, err
+ }
+ } else {
+ return nil, err
+ }
+
+ flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly)
+ if err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err != nil {
+ flock.release()
+ }
+ }()
+
+ var (
+ logw *os.File
+ logSize int64
+ )
+ if !readOnly {
+ logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return nil, err
+ }
+ logSize, err = logw.Seek(0, os.SEEK_END)
+ if err != nil {
+ logw.Close()
+ return nil, err
+ }
+ }
+
+ fs := &fileStorage{
+ path: path,
+ readOnly: readOnly,
+ flock: flock,
+ logw: logw,
+ logSize: logSize,
+ }
+ runtime.SetFinalizer(fs, (*fileStorage).Close)
+ return fs, nil
+}
+
+func (fs *fileStorage) Lock() (Locker, error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ if fs.readOnly {
+ return &fileStorageLock{}, nil
+ }
+ if fs.slock != nil {
+ return nil, ErrLocked
+ }
+ fs.slock = &fileStorageLock{fs: fs}
+ return fs.slock, nil
+}
+
+func itoa(buf []byte, i int, wid int) []byte {
+ u := uint(i)
+ if u == 0 && wid <= 1 {
+ return append(buf, '0')
+ }
+
+ // Assemble decimal in reverse order.
+ var b [32]byte
+ bp := len(b)
+ for ; u > 0 || wid > 0; u /= 10 {
+ bp--
+ wid--
+ b[bp] = byte(u%10) + '0'
+ }
+ return append(buf, b[bp:]...)
+}
+
+func (fs *fileStorage) printDay(t time.Time) {
+ if fs.day == t.Day() {
+ return
+ }
+ fs.day = t.Day()
+ fs.logw.Write([]byte("=============== " + t.Format("Jan 2, 2006 (MST)") + " ===============\n"))
+}
+
+func (fs *fileStorage) doLog(t time.Time, str string) {
+ if fs.logSize > logSizeThreshold {
+ // Rotate log file.
+ fs.logw.Close()
+ fs.logw = nil
+ fs.logSize = 0
+ rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old"))
+ }
+ if fs.logw == nil {
+ var err error
+ fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return
+ }
+ // Force printDay on new log file.
+ fs.day = 0
+ }
+ fs.printDay(t)
+ hour, min, sec := t.Clock()
+ msec := t.Nanosecond() / 1e3
+ // time
+ fs.buf = itoa(fs.buf[:0], hour, 2)
+ fs.buf = append(fs.buf, ':')
+ fs.buf = itoa(fs.buf, min, 2)
+ fs.buf = append(fs.buf, ':')
+ fs.buf = itoa(fs.buf, sec, 2)
+ fs.buf = append(fs.buf, '.')
+ fs.buf = itoa(fs.buf, msec, 6)
+ fs.buf = append(fs.buf, ' ')
+ // write
+ fs.buf = append(fs.buf, []byte(str)...)
+ fs.buf = append(fs.buf, '\n')
+ fs.logw.Write(fs.buf)
+}
+
+func (fs *fileStorage) Log(str string) {
+ if !fs.readOnly {
+ t := time.Now()
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return
+ }
+ fs.doLog(t, str)
+ }
+}
+
+func (fs *fileStorage) log(str string) {
+ if !fs.readOnly {
+ fs.doLog(time.Now(), str)
+ }
+}
+
+func (fs *fileStorage) SetMeta(fd FileDesc) (err error) {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ defer func() {
+ if err != nil {
+ fs.log(fmt.Sprintf("CURRENT: %v", err))
+ }
+ }()
+ path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
+ w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return
+ }
+ _, err = fmt.Fprintln(w, fsGenName(fd))
+ // Close the file first.
+ if cerr := w.Close(); cerr != nil {
+ fs.log(fmt.Sprintf("close CURRENT.%d: %v", fd.Num, cerr))
+ }
+ if err != nil {
+ return
+ }
+ return rename(path, filepath.Join(fs.path, "CURRENT"))
+}
+
+func (fs *fileStorage) GetMeta() (fd FileDesc, err error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return FileDesc{}, ErrClosed
+ }
+ dir, err := os.Open(fs.path)
+ if err != nil {
+ return
+ }
+ names, err := dir.Readdirnames(0)
+ // Close the dir first before checking for Readdirnames error.
+ if ce := dir.Close(); ce != nil {
+ fs.log(fmt.Sprintf("close dir: %v", ce))
+ }
+ if err != nil {
+ return
+ }
+ // Find latest CURRENT file.
+ var rem []string
+ var pend bool
+ var cerr error
+ for _, name := range names {
+ if strings.HasPrefix(name, "CURRENT") {
+ pend1 := len(name) > 7
+ var pendNum int64
+ // Make sure it is valid name for a CURRENT file, otherwise skip it.
+ if pend1 {
+ if name[7] != '.' || len(name) < 9 {
+ fs.log(fmt.Sprintf("skipping %s: invalid file name", name))
+ continue
+ }
+ var e1 error
+ if pendNum, e1 = strconv.ParseInt(name[8:], 10, 0); e1 != nil {
+ fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", name, e1))
+ continue
+ }
+ }
+ path := filepath.Join(fs.path, name)
+ r, e1 := os.OpenFile(path, os.O_RDONLY, 0)
+ if e1 != nil {
+ return FileDesc{}, e1
+ }
+ b, e1 := ioutil.ReadAll(r)
+ if e1 != nil {
+ r.Close()
+ return FileDesc{}, e1
+ }
+ var fd1 FileDesc
+ if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd1) {
+ fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", name))
+ if pend1 {
+ rem = append(rem, name)
+ }
+ if !pend1 || cerr == nil {
+ metaFd, _ := fsParseName(name)
+ cerr = &ErrCorrupted{
+ Fd: metaFd,
+ Err: errors.New("leveldb/storage: corrupted or incomplete meta file"),
+ }
+ }
+ } else if pend1 && pendNum != fd1.Num {
+ fs.log(fmt.Sprintf("skipping %s: inconsistent pending-file num: %d vs %d", name, pendNum, fd1.Num))
+ rem = append(rem, name)
+ } else if fd1.Num < fd.Num {
+ fs.log(fmt.Sprintf("skipping %s: obsolete", name))
+ if pend1 {
+ rem = append(rem, name)
+ }
+ } else {
+ fd = fd1
+ pend = pend1
+ }
+ if err := r.Close(); err != nil {
+ fs.log(fmt.Sprintf("close %s: %v", name, err))
+ }
+ }
+ }
+ // Don't remove any files if there is no valid CURRENT file.
+ if fd.Zero() {
+ if cerr != nil {
+ err = cerr
+ } else {
+ err = os.ErrNotExist
+ }
+ return
+ }
+ if !fs.readOnly {
+ // Rename pending CURRENT file to an effective CURRENT.
+ if pend {
+ path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
+ if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil {
+ fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", fd.Num, err))
+ }
+ }
+ // Remove obsolete or incomplete pending CURRENT files.
+ for _, name := range rem {
+ path := filepath.Join(fs.path, name)
+ if err := os.Remove(path); err != nil {
+ fs.log(fmt.Sprintf("remove %s: %v", name, err))
+ }
+ }
+ }
+ return
+}
+
+func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ dir, err := os.Open(fs.path)
+ if err != nil {
+ return
+ }
+ names, err := dir.Readdirnames(0)
+ // Close the dir first before checking for Readdirnames error.
+ if cerr := dir.Close(); cerr != nil {
+ fs.log(fmt.Sprintf("close dir: %v", cerr))
+ }
+ if err == nil {
+ for _, name := range names {
+ if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 {
+ fds = append(fds, fd)
+ }
+ }
+ }
+ return
+}
+
+func (fs *fileStorage) Open(fd FileDesc) (Reader, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0)
+ if err != nil {
+ if fsHasOldName(fd) && os.IsNotExist(err) {
+ of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0)
+ if err == nil {
+ goto ok
+ }
+ }
+ return nil, err
+ }
+ok:
+ fs.open++
+ return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Create(fd FileDesc) (Writer, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+ if fs.readOnly {
+ return nil, errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return nil, err
+ }
+ fs.open++
+ return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Remove(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ err := os.Remove(filepath.Join(fs.path, fsGenName(fd)))
+ if err != nil {
+ if fsHasOldName(fd) && os.IsNotExist(err) {
+ if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) {
+ fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err))
+ err = e1
+ }
+ } else {
+ fs.log(fmt.Sprintf("remove %s: %v", fd, err))
+ }
+ }
+ return err
+}
+
+func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error {
+ if !FileDescOk(oldfd) || !FileDescOk(newfd) {
+ return ErrInvalidFile
+ }
+ if oldfd == newfd {
+ return nil
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd)))
+}
+
+func (fs *fileStorage) Close() error {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ // Clear the finalizer.
+ runtime.SetFinalizer(fs, nil)
+
+ if fs.open > 0 {
+ fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open))
+ }
+ fs.open = -1
+ if fs.logw != nil {
+ fs.logw.Close()
+ }
+ return fs.flock.release()
+}
+
+type fileWrap struct {
+ *os.File
+ fs *fileStorage
+ fd FileDesc
+ closed bool
+}
+
+func (fw *fileWrap) Sync() error {
+ if err := fw.File.Sync(); err != nil {
+ return err
+ }
+ if fw.fd.Type == TypeManifest {
+ // Also sync parent directory if file type is manifest.
+ // See: https://code.google.com/p/leveldb/issues/detail?id=190.
+ if err := syncDir(fw.fs.path); err != nil {
+ fw.fs.log(fmt.Sprintf("syncDir: %v", err))
+ return err
+ }
+ }
+ return nil
+}
+
+func (fw *fileWrap) Close() error {
+ fw.fs.mu.Lock()
+ defer fw.fs.mu.Unlock()
+ if fw.closed {
+ return ErrClosed
+ }
+ fw.closed = true
+ fw.fs.open--
+ err := fw.File.Close()
+ if err != nil {
+ fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err))
+ }
+ return err
+}
+
+func fsGenName(fd FileDesc) string {
+ switch fd.Type {
+ case TypeManifest:
+ return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+ case TypeJournal:
+ return fmt.Sprintf("%06d.log", fd.Num)
+ case TypeTable:
+ return fmt.Sprintf("%06d.ldb", fd.Num)
+ case TypeTemp:
+ return fmt.Sprintf("%06d.tmp", fd.Num)
+ default:
+ panic("invalid file type")
+ }
+}
+
+func fsHasOldName(fd FileDesc) bool {
+ return fd.Type == TypeTable
+}
+
+func fsGenOldName(fd FileDesc) string {
+ switch fd.Type {
+ case TypeTable:
+ return fmt.Sprintf("%06d.sst", fd.Num)
+ }
+ return fsGenName(fd)
+}
+
+func fsParseName(name string) (fd FileDesc, ok bool) {
+ var tail string
+ _, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail)
+ if err == nil {
+ switch tail {
+ case "log":
+ fd.Type = TypeJournal
+ case "ldb", "sst":
+ fd.Type = TypeTable
+ case "tmp":
+ fd.Type = TypeTemp
+ default:
+ return
+ }
+ return fd, true
+ }
+ n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail)
+ if n == 1 {
+ fd.Type = TypeManifest
+ return fd, true
+ }
+ return
+}
+
+func fsParseNamePtr(name string, fd *FileDesc) bool {
+ _fd, ok := fsParseName(name)
+ if fd != nil {
+ *fd = _fd
+ }
+ return ok
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
new file mode 100644
index 000000000..5545aeef2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
@@ -0,0 +1,34 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build nacl
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ return nil, syscall.ENOTSUP
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ return syscall.ENOTSUP
+}
+
+func rename(oldpath, newpath string) error {
+ return syscall.ENOTSUP
+}
+
+func isErrInvalid(err error) bool {
+ return false
+}
+
+func syncDir(name string) error {
+ return syscall.ENOTSUP
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
new file mode 100644
index 000000000..bab62bfce
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
@@ -0,0 +1,65 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "os"
+ "path/filepath"
+)
+
+type plan9FileLock struct {
+ f *os.File
+}
+
+func (fl *plan9FileLock) release() error {
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var (
+ flag int
+ perm os.FileMode
+ )
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ perm = os.ModeExclusive
+ }
+ f, err := os.OpenFile(path, flag, perm)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644)
+ }
+ if err != nil {
+ return
+ }
+ fl = &plan9FileLock{f: f}
+ return
+}
+
+func rename(oldpath, newpath string) error {
+ if _, err := os.Stat(newpath); err == nil {
+ if err := os.Remove(newpath); err != nil {
+ return err
+ }
+ }
+
+ _, fname := filepath.Split(newpath)
+ return os.Rename(oldpath, fname)
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
new file mode 100644
index 000000000..79901ee4a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
@@ -0,0 +1,81 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build solaris
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+type unixFileLock struct {
+ f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+ if err := setFileLock(fl.f, false, false); err != nil {
+ return err
+ }
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var flag int
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ }
+ f, err := os.OpenFile(path, flag, 0)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+ }
+ if err != nil {
+ return
+ }
+ err = setFileLock(f, readOnly, true)
+ if err != nil {
+ f.Close()
+ return
+ }
+ fl = &unixFileLock{f: f}
+ return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ flock := syscall.Flock_t{
+ Type: syscall.F_UNLCK,
+ Start: 0,
+ Len: 0,
+ Whence: 1,
+ }
+ if lock {
+ if readOnly {
+ flock.Type = syscall.F_RDLCK
+ } else {
+ flock.Type = syscall.F_WRLCK
+ }
+ }
+ return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock)
+}
+
+func rename(oldpath, newpath string) error {
+ return os.Rename(oldpath, newpath)
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
new file mode 100644
index 000000000..7e2991537
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
@@ -0,0 +1,86 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+type unixFileLock struct {
+ f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+ if err := setFileLock(fl.f, false, false); err != nil {
+ return err
+ }
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var flag int
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ }
+ f, err := os.OpenFile(path, flag, 0)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+ }
+ if err != nil {
+ return
+ }
+ err = setFileLock(f, readOnly, true)
+ if err != nil {
+ f.Close()
+ return
+ }
+ fl = &unixFileLock{f: f}
+ return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ how := syscall.LOCK_UN
+ if lock {
+ if readOnly {
+ how = syscall.LOCK_SH
+ } else {
+ how = syscall.LOCK_EX
+ }
+ }
+ return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB)
+}
+
+func rename(oldpath, newpath string) error {
+ return os.Rename(oldpath, newpath)
+}
+
+func isErrInvalid(err error) bool {
+ if err == os.ErrInvalid {
+ return true
+ }
+ if syserr, ok := err.(*os.SyscallError); ok && syserr.Err == syscall.EINVAL {
+ return true
+ }
+ return false
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil && !isErrInvalid(err) {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
new file mode 100644
index 000000000..899335fd7
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+var (
+ modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+ procMoveFileExW = modkernel32.NewProc("MoveFileExW")
+)
+
+const (
+ _MOVEFILE_REPLACE_EXISTING = 1
+)
+
+type windowsFileLock struct {
+ fd syscall.Handle
+}
+
+func (fl *windowsFileLock) release() error {
+ return syscall.Close(fl.fd)
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ pathp, err := syscall.UTF16PtrFromString(path)
+ if err != nil {
+ return
+ }
+ var access, shareMode uint32
+ if readOnly {
+ access = syscall.GENERIC_READ
+ shareMode = syscall.FILE_SHARE_READ
+ } else {
+ access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
+ }
+ fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+ if err == syscall.ERROR_FILE_NOT_FOUND {
+ fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+ }
+ if err != nil {
+ return
+ }
+ fl = &windowsFileLock{fd: fd}
+ return
+}
+
+func moveFileEx(from *uint16, to *uint16, flags uint32) error {
+ r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags))
+ if r1 == 0 {
+ if e1 != 0 {
+ return error(e1)
+ }
+ return syscall.EINVAL
+ }
+ return nil
+}
+
+func rename(oldpath, newpath string) error {
+ from, err := syscall.UTF16PtrFromString(oldpath)
+ if err != nil {
+ return err
+ }
+ to, err := syscall.UTF16PtrFromString(newpath)
+ if err != nil {
+ return err
+ }
+ return moveFileEx(from, to, _MOVEFILE_REPLACE_EXISTING)
+}
+
+func syncDir(name string) error { return nil }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
new file mode 100644
index 000000000..9b0421f03
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
@@ -0,0 +1,218 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "bytes"
+ "os"
+ "sync"
+)
+
+const typeShift = 3
+
+type memStorageLock struct {
+ ms *memStorage
+}
+
+func (lock *memStorageLock) Unlock() {
+ ms := lock.ms
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.slock == lock {
+ ms.slock = nil
+ }
+ return
+}
+
+// memStorage is a memory-backed storage.
+type memStorage struct {
+ mu sync.Mutex
+ slock *memStorageLock
+ files map[uint64]*memFile
+ meta FileDesc
+}
+
+// NewMemStorage returns a new memory-backed storage implementation.
+func NewMemStorage() Storage {
+ return &memStorage{
+ files: make(map[uint64]*memFile),
+ }
+}
+
+func (ms *memStorage) Lock() (Locker, error) {
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.slock != nil {
+ return nil, ErrLocked
+ }
+ ms.slock = &memStorageLock{ms: ms}
+ return ms.slock, nil
+}
+
+func (*memStorage) Log(str string) {}
+
+func (ms *memStorage) SetMeta(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+
+ ms.mu.Lock()
+ ms.meta = fd
+ ms.mu.Unlock()
+ return nil
+}
+
+func (ms *memStorage) GetMeta() (FileDesc, error) {
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.meta.Zero() {
+ return FileDesc{}, os.ErrNotExist
+ }
+ return ms.meta, nil
+}
+
+func (ms *memStorage) List(ft FileType) ([]FileDesc, error) {
+ ms.mu.Lock()
+ var fds []FileDesc
+ for x := range ms.files {
+ fd := unpackFile(x)
+ if fd.Type&ft != 0 {
+ fds = append(fds, fd)
+ }
+ }
+ ms.mu.Unlock()
+ return fds, nil
+}
+
+func (ms *memStorage) Open(fd FileDesc) (Reader, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if m, exist := ms.files[packFile(fd)]; exist {
+ if m.open {
+ return nil, errFileOpen
+ }
+ m.open = true
+ return &memReader{Reader: bytes.NewReader(m.Bytes()), ms: ms, m: m}, nil
+ }
+ return nil, os.ErrNotExist
+}
+
+func (ms *memStorage) Create(fd FileDesc) (Writer, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ x := packFile(fd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ m, exist := ms.files[x]
+ if exist {
+ if m.open {
+ return nil, errFileOpen
+ }
+ m.Reset()
+ } else {
+ m = &memFile{}
+ ms.files[x] = m
+ }
+ m.open = true
+ return &memWriter{memFile: m, ms: ms}, nil
+}
+
+func (ms *memStorage) Remove(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+
+ x := packFile(fd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if _, exist := ms.files[x]; exist {
+ delete(ms.files, x)
+ return nil
+ }
+ return os.ErrNotExist
+}
+
+func (ms *memStorage) Rename(oldfd, newfd FileDesc) error {
+ if FileDescOk(oldfd) || FileDescOk(newfd) {
+ return ErrInvalidFile
+ }
+ if oldfd == newfd {
+ return nil
+ }
+
+ oldx := packFile(oldfd)
+ newx := packFile(newfd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ oldm, exist := ms.files[oldx]
+ if !exist {
+ return os.ErrNotExist
+ }
+ newm, exist := ms.files[newx]
+ if (exist && newm.open) || oldm.open {
+ return errFileOpen
+ }
+ delete(ms.files, oldx)
+ ms.files[newx] = oldm
+ return nil
+}
+
+func (*memStorage) Close() error { return nil }
+
+type memFile struct {
+ bytes.Buffer
+ open bool
+}
+
+type memReader struct {
+ *bytes.Reader
+ ms *memStorage
+ m *memFile
+ closed bool
+}
+
+func (mr *memReader) Close() error {
+ mr.ms.mu.Lock()
+ defer mr.ms.mu.Unlock()
+ if mr.closed {
+ return ErrClosed
+ }
+ mr.m.open = false
+ return nil
+}
+
+type memWriter struct {
+ *memFile
+ ms *memStorage
+ closed bool
+}
+
+func (*memWriter) Sync() error { return nil }
+
+func (mw *memWriter) Close() error {
+ mw.ms.mu.Lock()
+ defer mw.ms.mu.Unlock()
+ if mw.closed {
+ return ErrClosed
+ }
+ mw.memFile.open = false
+ return nil
+}
+
+func packFile(fd FileDesc) uint64 {
+ return uint64(fd.Num)<<typeShift | uint64(fd.Type)
+}
+
+func unpackFile(x uint64) FileDesc {
+ return FileDesc{FileType(x) & TypeAll, int64(x >> typeShift)}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
new file mode 100644
index 000000000..c16bce6b6
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
@@ -0,0 +1,179 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package storage provides storage abstraction for LevelDB.
+package storage
+
+import (
+ "errors"
+ "fmt"
+ "io"
+)
+
+// FileType represent a file type.
+type FileType int
+
+// File types.
+const (
+ TypeManifest FileType = 1 << iota
+ TypeJournal
+ TypeTable
+ TypeTemp
+
+ TypeAll = TypeManifest | TypeJournal | TypeTable | TypeTemp
+)
+
+func (t FileType) String() string {
+ switch t {
+ case TypeManifest:
+ return "manifest"
+ case TypeJournal:
+ return "journal"
+ case TypeTable:
+ return "table"
+ case TypeTemp:
+ return "temp"
+ }
+ return fmt.Sprintf("<unknown:%d>", t)
+}
+
+// Common error.
+var (
+ ErrInvalidFile = errors.New("leveldb/storage: invalid file for argument")
+ ErrLocked = errors.New("leveldb/storage: already locked")
+ ErrClosed = errors.New("leveldb/storage: closed")
+)
+
+// ErrCorrupted is the type that wraps errors that indicate corruption of
+// a file. Package storage has its own type instead of using
+// errors.ErrCorrupted to prevent circular import.
+type ErrCorrupted struct {
+ Fd FileDesc
+ Err error
+}
+
+func (e *ErrCorrupted) Error() string {
+ if !e.Fd.Zero() {
+ return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+ }
+ return e.Err.Error()
+}
+
+// Syncer is the interface that wraps basic Sync method.
+type Syncer interface {
+ // Sync commits the current contents of the file to stable storage.
+ Sync() error
+}
+
+// Reader is the interface that groups the basic Read, Seek, ReadAt and Close
+// methods.
+type Reader interface {
+ io.ReadSeeker
+ io.ReaderAt
+ io.Closer
+}
+
+// Writer is the interface that groups the basic Write, Sync and Close
+// methods.
+type Writer interface {
+ io.WriteCloser
+ Syncer
+}
+
+// Locker is the interface that wraps Unlock method.
+type Locker interface {
+ Unlock()
+}
+
+// FileDesc is a 'file descriptor'.
+type FileDesc struct {
+ Type FileType
+ Num int64
+}
+
+func (fd FileDesc) String() string {
+ switch fd.Type {
+ case TypeManifest:
+ return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+ case TypeJournal:
+ return fmt.Sprintf("%06d.log", fd.Num)
+ case TypeTable:
+ return fmt.Sprintf("%06d.ldb", fd.Num)
+ case TypeTemp:
+ return fmt.Sprintf("%06d.tmp", fd.Num)
+ default:
+ return fmt.Sprintf("%#x-%d", fd.Type, fd.Num)
+ }
+}
+
+// Zero returns true if fd == (FileDesc{}).
+func (fd FileDesc) Zero() bool {
+ return fd == (FileDesc{})
+}
+
+// FileDescOk returns true if fd is a valid 'file descriptor'.
+func FileDescOk(fd FileDesc) bool {
+ switch fd.Type {
+ case TypeManifest:
+ case TypeJournal:
+ case TypeTable:
+ case TypeTemp:
+ default:
+ return false
+ }
+ return fd.Num >= 0
+}
+
+// Storage is the storage. A storage instance must be safe for concurrent use.
+type Storage interface {
+ // Lock locks the storage. Any subsequent attempt to call Lock will fail
+ // until the last lock released.
+ // Caller should call Unlock method after use.
+ Lock() (Locker, error)
+
+ // Log logs a string. This is used for logging.
+ // An implementation may write to a file, stdout or simply do nothing.
+ Log(str string)
+
+ // SetMeta store 'file descriptor' that can later be acquired using GetMeta
+ // method. The 'file descriptor' should point to a valid file.
+ // SetMeta should be implemented in such way that changes should happen
+ // atomically.
+ SetMeta(fd FileDesc) error
+
+ // GetMeta returns 'file descriptor' stored in meta. The 'file descriptor'
+ // can be updated using SetMeta method.
+ // Returns os.ErrNotExist if meta doesn't store any 'file descriptor', or
+ // 'file descriptor' point to nonexistent file.
+ GetMeta() (FileDesc, error)
+
+ // List returns file descriptors that match the given file types.
+ // The file types may be OR'ed together.
+ List(ft FileType) ([]FileDesc, error)
+
+ // Open opens file with the given 'file descriptor' read-only.
+ // Returns os.ErrNotExist error if the file does not exist.
+ // Returns ErrClosed if the underlying storage is closed.
+ Open(fd FileDesc) (Reader, error)
+
+ // Create creates file with the given 'file descriptor', truncate if already
+ // exist and opens write-only.
+ // Returns ErrClosed if the underlying storage is closed.
+ Create(fd FileDesc) (Writer, error)
+
+ // Remove removes file with the given 'file descriptor'.
+ // Returns ErrClosed if the underlying storage is closed.
+ Remove(fd FileDesc) error
+
+ // Rename renames file from oldfd to newfd.
+ // Returns ErrClosed if the underlying storage is closed.
+ Rename(oldfd, newfd FileDesc) error
+
+ // Close closes the storage.
+ // It is valid to call Close multiple times. Other methods should not be
+ // called after the storage has been closed.
+ Close() error
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
new file mode 100644
index 000000000..81d18a531
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
@@ -0,0 +1,529 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sort"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/table"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// tFile holds basic information about a table.
+type tFile struct {
+ fd storage.FileDesc
+ seekLeft int32
+ size int64
+ imin, imax internalKey
+}
+
+// Returns true if given key is after largest key of this table.
+func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
+}
+
+// Returns true if given key is before smallest key of this table.
+func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
+}
+
+// Returns true if given key range overlaps with this table key range.
+func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
+ return !t.after(icmp, umin) && !t.before(icmp, umax)
+}
+
+// Cosumes one seek and return current seeks left.
+func (t *tFile) consumeSeek() int32 {
+ return atomic.AddInt32(&t.seekLeft, -1)
+}
+
+// Creates new tFile.
+func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
+ f := &tFile{
+ fd: fd,
+ size: size,
+ imin: imin,
+ imax: imax,
+ }
+
+ // We arrange to automatically compact this file after
+ // a certain number of seeks. Let's assume:
+ // (1) One seek costs 10ms
+ // (2) Writing or reading 1MB costs 10ms (100MB/s)
+ // (3) A compaction of 1MB does 25MB of IO:
+ // 1MB read from this level
+ // 10-12MB read from next level (boundaries may be misaligned)
+ // 10-12MB written to next level
+ // This implies that 25 seeks cost the same as the compaction
+ // of 1MB of data. I.e., one seek costs approximately the
+ // same as the compaction of 40KB of data. We are a little
+ // conservative and allow approximately one seek for every 16KB
+ // of data before triggering a compaction.
+ f.seekLeft = int32(size / 16384)
+ if f.seekLeft < 100 {
+ f.seekLeft = 100
+ }
+
+ return f
+}
+
+func tableFileFromRecord(r atRecord) *tFile {
+ return newTableFile(storage.FileDesc{storage.TypeTable, r.num}, r.size, r.imin, r.imax)
+}
+
+// tFiles hold multiple tFile.
+type tFiles []*tFile
+
+func (tf tFiles) Len() int { return len(tf) }
+func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
+
+func (tf tFiles) nums() string {
+ x := "[ "
+ for i, f := range tf {
+ if i != 0 {
+ x += ", "
+ }
+ x += fmt.Sprint(f.fd.Num)
+ }
+ x += " ]"
+ return x
+}
+
+// Returns true if i smallest key is less than j.
+// This used for sort by key in ascending order.
+func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
+ a, b := tf[i], tf[j]
+ n := icmp.Compare(a.imin, b.imin)
+ if n == 0 {
+ return a.fd.Num < b.fd.Num
+ }
+ return n < 0
+}
+
+// Returns true if i file number is greater than j.
+// This used for sort by file number in descending order.
+func (tf tFiles) lessByNum(i, j int) bool {
+ return tf[i].fd.Num > tf[j].fd.Num
+}
+
+// Sorts tables by key in ascending order.
+func (tf tFiles) sortByKey(icmp *iComparer) {
+ sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
+}
+
+// Sorts tables by file number in descending order.
+func (tf tFiles) sortByNum() {
+ sort.Sort(&tFilesSortByNum{tFiles: tf})
+}
+
+// Returns sum of all tables size.
+func (tf tFiles) size() (sum int64) {
+ for _, t := range tf {
+ sum += t.size
+ }
+ return sum
+}
+
+// Searches smallest index of tables whose its smallest
+// key is after or equal with given key.
+func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imin, ikey) >= 0
+ })
+}
+
+// Searches smallest index of tables whose its largest
+// key is after or equal with given key.
+func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imax, ikey) >= 0
+ })
+}
+
+// Returns true if given key range overlaps with one or more
+// tables key range. If unsorted is true then binary search will not be used.
+func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
+ if unsorted {
+ // Check against all files.
+ for _, t := range tf {
+ if t.overlaps(icmp, umin, umax) {
+ return true
+ }
+ }
+ return false
+ }
+
+ i := 0
+ if len(umin) > 0 {
+ // Find the earliest possible internal key for min.
+ i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
+ }
+ if i >= len(tf) {
+ // Beginning of range is after all files, so no overlap.
+ return false
+ }
+ return !tf[i].before(icmp, umax)
+}
+
+// Returns tables whose its key range overlaps with given key range.
+// Range will be expanded if ukey found hop across tables.
+// If overlapped is true then the search will be restarted if umax
+// expanded.
+// The dst content will be overwritten.
+func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
+ dst = dst[:0]
+ for i := 0; i < len(tf); {
+ t := tf[i]
+ if t.overlaps(icmp, umin, umax) {
+ if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
+ umin = t.imin.ukey()
+ dst = dst[:0]
+ i = 0
+ continue
+ } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
+ umax = t.imax.ukey()
+ // Restart search if it is overlapped.
+ if overlapped {
+ dst = dst[:0]
+ i = 0
+ continue
+ }
+ }
+
+ dst = append(dst, t)
+ }
+ i++
+ }
+
+ return dst
+}
+
+// Returns tables key range.
+func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
+ for i, t := range tf {
+ if i == 0 {
+ imin, imax = t.imin, t.imax
+ continue
+ }
+ if icmp.Compare(t.imin, imin) < 0 {
+ imin = t.imin
+ }
+ if icmp.Compare(t.imax, imax) > 0 {
+ imax = t.imax
+ }
+ }
+
+ return
+}
+
+// Creates iterator index from tables.
+func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
+ if slice != nil {
+ var start, limit int
+ if slice.Start != nil {
+ start = tf.searchMax(icmp, internalKey(slice.Start))
+ }
+ if slice.Limit != nil {
+ limit = tf.searchMin(icmp, internalKey(slice.Limit))
+ } else {
+ limit = tf.Len()
+ }
+ tf = tf[start:limit]
+ }
+ return iterator.NewArrayIndexer(&tFilesArrayIndexer{
+ tFiles: tf,
+ tops: tops,
+ icmp: icmp,
+ slice: slice,
+ ro: ro,
+ })
+}
+
+// Tables iterator index.
+type tFilesArrayIndexer struct {
+ tFiles
+ tops *tOps
+ icmp *iComparer
+ slice *util.Range
+ ro *opt.ReadOptions
+}
+
+func (a *tFilesArrayIndexer) Search(key []byte) int {
+ return a.searchMax(a.icmp, internalKey(key))
+}
+
+func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
+ if i == 0 || i == a.Len()-1 {
+ return a.tops.newIterator(a.tFiles[i], a.slice, a.ro)
+ }
+ return a.tops.newIterator(a.tFiles[i], nil, a.ro)
+}
+
+// Helper type for sortByKey.
+type tFilesSortByKey struct {
+ tFiles
+ icmp *iComparer
+}
+
+func (x *tFilesSortByKey) Less(i, j int) bool {
+ return x.lessByKey(x.icmp, i, j)
+}
+
+// Helper type for sortByNum.
+type tFilesSortByNum struct {
+ tFiles
+}
+
+func (x *tFilesSortByNum) Less(i, j int) bool {
+ return x.lessByNum(i, j)
+}
+
+// Table operations.
+type tOps struct {
+ s *session
+ noSync bool
+ cache *cache.Cache
+ bcache *cache.Cache
+ bpool *util.BufferPool
+}
+
+// Creates an empty table and returns table writer.
+func (t *tOps) create() (*tWriter, error) {
+ fd := storage.FileDesc{storage.TypeTable, t.s.allocFileNum()}
+ fw, err := t.s.stor.Create(fd)
+ if err != nil {
+ return nil, err
+ }
+ return &tWriter{
+ t: t,
+ fd: fd,
+ w: fw,
+ tw: table.NewWriter(fw, t.s.o.Options),
+ }, nil
+}
+
+// Builds table from src iterator.
+func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
+ w, err := t.create()
+ if err != nil {
+ return
+ }
+
+ defer func() {
+ if err != nil {
+ w.drop()
+ }
+ }()
+
+ for src.Next() {
+ err = w.append(src.Key(), src.Value())
+ if err != nil {
+ return
+ }
+ }
+ err = src.Error()
+ if err != nil {
+ return
+ }
+
+ n = w.tw.EntriesLen()
+ f, err = w.finish()
+ return
+}
+
+// Opens table. It returns a cache handle, which should
+// be released after use.
+func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
+ ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
+ var r storage.Reader
+ r, err = t.s.stor.Open(f.fd)
+ if err != nil {
+ return 0, nil
+ }
+
+ var bcache *cache.NamespaceGetter
+ if t.bcache != nil {
+ bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
+ }
+
+ var tr *table.Reader
+ tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
+ if err != nil {
+ r.Close()
+ return 0, nil
+ }
+ return 1, tr
+
+ })
+ if ch == nil && err == nil {
+ err = ErrClosed
+ }
+ return
+}
+
+// Finds key/value pair whose key is greater than or equal to the
+// given key.
+func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).Find(key, true, ro)
+}
+
+// Finds key that is greater than or equal to the given key.
+func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).FindKey(key, true, ro)
+}
+
+// Returns approximate offset of the given key.
+func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).OffsetOf(key)
+}
+
+// Creates an iterator from the given table.
+func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ ch, err := t.open(f)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
+ iter.SetReleaser(ch)
+ return iter
+}
+
+// Removes table from persistent storage. It waits until
+// no one use the the table.
+func (t *tOps) remove(f *tFile) {
+ t.cache.Delete(0, uint64(f.fd.Num), func() {
+ if err := t.s.stor.Remove(f.fd); err != nil {
+ t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
+ } else {
+ t.s.logf("table@remove removed @%d", f.fd.Num)
+ }
+ if t.bcache != nil {
+ t.bcache.EvictNS(uint64(f.fd.Num))
+ }
+ })
+}
+
+// Closes the table ops instance. It will close all tables,
+// regadless still used or not.
+func (t *tOps) close() {
+ t.bpool.Close()
+ t.cache.Close()
+ if t.bcache != nil {
+ t.bcache.CloseWeak()
+ }
+}
+
+// Creates new initialized table ops instance.
+func newTableOps(s *session) *tOps {
+ var (
+ cacher cache.Cacher
+ bcache *cache.Cache
+ bpool *util.BufferPool
+ )
+ if s.o.GetOpenFilesCacheCapacity() > 0 {
+ cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
+ }
+ if !s.o.GetDisableBlockCache() {
+ var bcacher cache.Cacher
+ if s.o.GetBlockCacheCapacity() > 0 {
+ bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
+ }
+ bcache = cache.NewCache(bcacher)
+ }
+ if !s.o.GetDisableBufferPool() {
+ bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
+ }
+ return &tOps{
+ s: s,
+ noSync: s.o.GetNoSync(),
+ cache: cache.NewCache(cacher),
+ bcache: bcache,
+ bpool: bpool,
+ }
+}
+
+// tWriter wraps the table writer. It keep track of file descriptor
+// and added key range.
+type tWriter struct {
+ t *tOps
+
+ fd storage.FileDesc
+ w storage.Writer
+ tw *table.Writer
+
+ first, last []byte
+}
+
+// Append key/value pair to the table.
+func (w *tWriter) append(key, value []byte) error {
+ if w.first == nil {
+ w.first = append([]byte{}, key...)
+ }
+ w.last = append(w.last[:0], key...)
+ return w.tw.Append(key, value)
+}
+
+// Returns true if the table is empty.
+func (w *tWriter) empty() bool {
+ return w.first == nil
+}
+
+// Closes the storage.Writer.
+func (w *tWriter) close() {
+ if w.w != nil {
+ w.w.Close()
+ w.w = nil
+ }
+}
+
+// Finalizes the table and returns table file.
+func (w *tWriter) finish() (f *tFile, err error) {
+ defer w.close()
+ err = w.tw.Close()
+ if err != nil {
+ return
+ }
+ if !w.t.noSync {
+ err = w.w.Sync()
+ if err != nil {
+ return
+ }
+ }
+ f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
+ return
+}
+
+// Drops the table.
+func (w *tWriter) drop() {
+ w.close()
+ w.t.s.stor.Remove(w.fd)
+ w.t.s.reuseFileNum(w.fd.Num)
+ w.tw = nil
+ w.first = nil
+ w.last = nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
new file mode 100644
index 000000000..c5be420b3
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
@@ -0,0 +1,1134 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "sort"
+ "strings"
+ "sync"
+
+ "github.com/golang/snappy"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrReaderReleased = errors.New("leveldb/table: reader released")
+ ErrIterReleased = errors.New("leveldb/table: iterator released")
+)
+
+// ErrCorrupted describes error due to corruption. This error will be wrapped
+// with errors.ErrCorrupted.
+type ErrCorrupted struct {
+ Pos int64
+ Size int64
+ Kind string
+ Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason)
+}
+
+func max(x, y int) int {
+ if x > y {
+ return x
+ }
+ return y
+}
+
+type block struct {
+ bpool *util.BufferPool
+ bh blockHandle
+ data []byte
+ restartsLen int
+ restartsOffset int
+}
+
+func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) {
+ index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+ offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):]))
+ offset++ // shared always zero, since this is a restart point
+ v1, n1 := binary.Uvarint(b.data[offset:]) // key length
+ _, n2 := binary.Uvarint(b.data[offset+n1:]) // value length
+ m := offset + n1 + n2
+ return cmp.Compare(b.data[m:m+int(v1)], key) > 0
+ }) + rstart - 1
+ if index < rstart {
+ // The smallest key is greater-than key sought.
+ index = rstart
+ }
+ offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+ return
+}
+
+func (b *block) restartIndex(rstart, rlimit, offset int) int {
+ return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+ return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset
+ }) + rstart - 1
+}
+
+func (b *block) restartOffset(index int) int {
+ return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+}
+
+func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) {
+ if offset >= b.restartsOffset {
+ if offset != b.restartsOffset {
+ err = &ErrCorrupted{Reason: "entries offset not aligned"}
+ }
+ return
+ }
+ v0, n0 := binary.Uvarint(b.data[offset:]) // Shared prefix length
+ v1, n1 := binary.Uvarint(b.data[offset+n0:]) // Key length
+ v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length
+ m := n0 + n1 + n2
+ n = m + int(v1) + int(v2)
+ if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset {
+ err = &ErrCorrupted{Reason: "entries corrupted"}
+ return
+ }
+ key = b.data[offset+m : offset+m+int(v1)]
+ value = b.data[offset+m+int(v1) : offset+n]
+ nShared = int(v0)
+ return
+}
+
+func (b *block) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
+}
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+type blockIter struct {
+ tr *Reader
+ block *block
+ blockReleaser util.Releaser
+ releaser util.Releaser
+ key, value []byte
+ offset int
+ // Previous offset, only filled by Next.
+ prevOffset int
+ prevNode []int
+ prevKeys []byte
+ restartIndex int
+ // Iterator direction.
+ dir dir
+ // Restart index slice range.
+ riStart int
+ riLimit int
+ // Offset slice range.
+ offsetStart int
+ offsetRealStart int
+ offsetLimit int
+ // Error.
+ err error
+}
+
+func (i *blockIter) sErr(err error) {
+ i.err = err
+ i.key = nil
+ i.value = nil
+ i.prevNode = nil
+ i.prevKeys = nil
+}
+
+func (i *blockIter) reset() {
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.restartIndex = i.riStart
+ i.offset = i.offsetStart
+ i.dir = dirSOI
+ i.key = i.key[:0]
+ i.value = nil
+}
+
+func (i *blockIter) isFirst() bool {
+ switch i.dir {
+ case dirForward:
+ return i.prevOffset == i.offsetRealStart
+ case dirBackward:
+ return len(i.prevNode) == 1 && i.restartIndex == i.riStart
+ }
+ return false
+}
+
+func (i *blockIter) isLast() bool {
+ switch i.dir {
+ case dirForward, dirBackward:
+ return i.offset == i.offsetLimit
+ }
+ return false
+}
+
+func (i *blockIter) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.dir = dirSOI
+ return i.Next()
+}
+
+func (i *blockIter) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.dir = dirEOI
+ return i.Prev()
+}
+
+func (i *blockIter) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key)
+ if err != nil {
+ i.sErr(err)
+ return false
+ }
+ i.restartIndex = ri
+ i.offset = max(i.offsetStart, offset)
+ if i.dir == dirSOI || i.dir == dirEOI {
+ i.dir = dirForward
+ }
+ for i.Next() {
+ if i.tr.cmp.Compare(i.key, key) >= 0 {
+ return true
+ }
+ }
+ return false
+}
+
+func (i *blockIter) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirSOI {
+ i.restartIndex = i.riStart
+ i.offset = i.offsetStart
+ } else if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ for i.offset < i.offsetRealStart {
+ key, value, nShared, n, err := i.block.entry(i.offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if n == 0 {
+ i.dir = dirEOI
+ return false
+ }
+ i.key = append(i.key[:nShared], key...)
+ i.value = value
+ i.offset += n
+ }
+ if i.offset >= i.offsetLimit {
+ i.dir = dirEOI
+ if i.offset != i.offsetLimit {
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+ }
+ return false
+ }
+ key, value, nShared, n, err := i.block.entry(i.offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if n == 0 {
+ i.dir = dirEOI
+ return false
+ }
+ i.key = append(i.key[:nShared], key...)
+ i.value = value
+ i.prevOffset = i.offset
+ i.offset += n
+ i.dir = dirForward
+ return true
+}
+
+func (i *blockIter) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ var ri int
+ if i.dir == dirForward {
+ // Change direction.
+ i.offset = i.prevOffset
+ if i.offset == i.offsetRealStart {
+ i.dir = dirSOI
+ return false
+ }
+ ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset)
+ i.dir = dirBackward
+ } else if i.dir == dirEOI {
+ // At the end of iterator.
+ i.restartIndex = i.riLimit
+ i.offset = i.offsetLimit
+ if i.offset == i.offsetRealStart {
+ i.dir = dirSOI
+ return false
+ }
+ ri = i.riLimit - 1
+ i.dir = dirBackward
+ } else if len(i.prevNode) == 1 {
+ // This is the end of a restart range.
+ i.offset = i.prevNode[0]
+ i.prevNode = i.prevNode[:0]
+ if i.restartIndex == i.riStart {
+ i.dir = dirSOI
+ return false
+ }
+ i.restartIndex--
+ ri = i.restartIndex
+ } else {
+ // In the middle of restart range, get from cache.
+ n := len(i.prevNode) - 3
+ node := i.prevNode[n:]
+ i.prevNode = i.prevNode[:n]
+ // Get the key.
+ ko := node[0]
+ i.key = append(i.key[:0], i.prevKeys[ko:]...)
+ i.prevKeys = i.prevKeys[:ko]
+ // Get the value.
+ vo := node[1]
+ vl := vo + node[2]
+ i.value = i.block.data[vo:vl]
+ i.offset = vl
+ return true
+ }
+ // Build entries cache.
+ i.key = i.key[:0]
+ i.value = nil
+ offset := i.block.restartOffset(ri)
+ if offset == i.offset {
+ ri--
+ if ri < 0 {
+ i.dir = dirSOI
+ return false
+ }
+ offset = i.block.restartOffset(ri)
+ }
+ i.prevNode = append(i.prevNode, offset)
+ for {
+ key, value, nShared, n, err := i.block.entry(offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if offset >= i.offsetRealStart {
+ if i.value != nil {
+ // Appends 3 variables:
+ // 1. Previous keys offset
+ // 2. Value offset in the data block
+ // 3. Value length
+ i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value))
+ i.prevKeys = append(i.prevKeys, i.key...)
+ }
+ i.value = value
+ }
+ i.key = append(i.key[:nShared], key...)
+ offset += n
+ // Stop if target offset reached.
+ if offset >= i.offset {
+ if offset != i.offset {
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+ return false
+ }
+
+ break
+ }
+ }
+ i.restartIndex = ri
+ i.offset = offset
+ return true
+}
+
+func (i *blockIter) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.key
+}
+
+func (i *blockIter) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.value
+}
+
+func (i *blockIter) Release() {
+ if i.dir != dirReleased {
+ i.tr = nil
+ i.block = nil
+ i.prevNode = nil
+ i.prevKeys = nil
+ i.key = nil
+ i.value = nil
+ i.dir = dirReleased
+ if i.blockReleaser != nil {
+ i.blockReleaser.Release()
+ i.blockReleaser = nil
+ }
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+ }
+}
+
+func (i *blockIter) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *blockIter) Valid() bool {
+ return i.err == nil && (i.dir == dirBackward || i.dir == dirForward)
+}
+
+func (i *blockIter) Error() error {
+ return i.err
+}
+
+type filterBlock struct {
+ bpool *util.BufferPool
+ data []byte
+ oOffset int
+ baseLg uint
+ filtersNum int
+}
+
+func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool {
+ i := int(offset >> b.baseLg)
+ if i < b.filtersNum {
+ o := b.data[b.oOffset+i*4:]
+ n := int(binary.LittleEndian.Uint32(o))
+ m := int(binary.LittleEndian.Uint32(o[4:]))
+ if n < m && m <= b.oOffset {
+ return filter.Contains(b.data[n:m], key)
+ } else if n == m {
+ return false
+ }
+ }
+ return true
+}
+
+func (b *filterBlock) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
+}
+
+type indexIter struct {
+ *blockIter
+ tr *Reader
+ slice *util.Range
+ // Options
+ fillCache bool
+}
+
+func (i *indexIter) Get() iterator.Iterator {
+ value := i.Value()
+ if value == nil {
+ return nil
+ }
+ dataBH, n := decodeBlockHandle(value)
+ if n == 0 {
+ return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle"))
+ }
+
+ var slice *util.Range
+ if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) {
+ slice = i.slice
+ }
+ return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache)
+}
+
+// Reader is a table reader.
+type Reader struct {
+ mu sync.RWMutex
+ fd storage.FileDesc
+ reader io.ReaderAt
+ cache *cache.NamespaceGetter
+ err error
+ bpool *util.BufferPool
+ // Options
+ o *opt.Options
+ cmp comparer.Comparer
+ filter filter.Filter
+ verifyChecksum bool
+
+ dataEnd int64
+ metaBH, indexBH, filterBH blockHandle
+ indexBlock *block
+ filterBlock *filterBlock
+}
+
+func (r *Reader) blockKind(bh blockHandle) string {
+ switch bh.offset {
+ case r.metaBH.offset:
+ return "meta-block"
+ case r.indexBH.offset:
+ return "index-block"
+ case r.filterBH.offset:
+ if r.filterBH.length > 0 {
+ return "filter-block"
+ }
+ }
+ return "data-block"
+}
+
+func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
+ return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
+}
+
+func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
+ return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason)
+}
+
+func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
+ if cerr, ok := err.(*ErrCorrupted); ok {
+ cerr.Pos = int64(bh.offset)
+ cerr.Size = int64(bh.length)
+ cerr.Kind = r.blockKind(bh)
+ return &errors.ErrCorrupted{Fd: r.fd, Err: cerr}
+ }
+ return err
+}
+
+func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) {
+ data := r.bpool.Get(int(bh.length + blockTrailerLen))
+ if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF {
+ return nil, err
+ }
+
+ if verifyChecksum {
+ n := bh.length + 1
+ checksum0 := binary.LittleEndian.Uint32(data[n:])
+ checksum1 := util.NewCRC(data[:n]).Value()
+ if checksum0 != checksum1 {
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1))
+ }
+ }
+
+ switch data[bh.length] {
+ case blockTypeNoCompression:
+ data = data[:bh.length]
+ case blockTypeSnappyCompression:
+ decLen, err := snappy.DecodedLen(data[:bh.length])
+ if err != nil {
+ return nil, r.newErrCorruptedBH(bh, err.Error())
+ }
+ decData := r.bpool.Get(decLen)
+ decData, err = snappy.Decode(decData, data[:bh.length])
+ r.bpool.Put(data)
+ if err != nil {
+ r.bpool.Put(decData)
+ return nil, r.newErrCorruptedBH(bh, err.Error())
+ }
+ data = decData
+ default:
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length]))
+ }
+ return data, nil
+}
+
+func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) {
+ data, err := r.readRawBlock(bh, verifyChecksum)
+ if err != nil {
+ return nil, err
+ }
+ restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
+ b := &block{
+ bpool: r.bpool,
+ bh: bh,
+ data: data,
+ restartsLen: restartsLen,
+ restartsOffset: len(data) - (restartsLen+1)*4,
+ }
+ return b, nil
+}
+
+func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) {
+ if r.cache != nil {
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *block
+ b, err = r.readBlock(bh, verifyChecksum)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*block)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
+ }
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readBlock(bh, verifyChecksum)
+ return b, b, err
+}
+
+func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) {
+ data, err := r.readRawBlock(bh, true)
+ if err != nil {
+ return nil, err
+ }
+ n := len(data)
+ if n < 5 {
+ return nil, r.newErrCorruptedBH(bh, "too short")
+ }
+ m := n - 5
+ oOffset := int(binary.LittleEndian.Uint32(data[m:]))
+ if oOffset > m {
+ return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset")
+ }
+ b := &filterBlock{
+ bpool: r.bpool,
+ data: data,
+ oOffset: oOffset,
+ baseLg: uint(data[n-1]),
+ filtersNum: (m - oOffset) / 4,
+ }
+ return b, nil
+}
+
+func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) {
+ if r.cache != nil {
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *filterBlock
+ b, err = r.readFilterBlock(bh)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*filterBlock)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
+ }
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readFilterBlock(bh)
+ return b, b, err
+}
+
+func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) {
+ if r.indexBlock == nil {
+ return r.readBlockCached(r.indexBH, true, fillCache)
+ }
+ return r.indexBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) {
+ if r.filterBlock == nil {
+ return r.readFilterBlockCached(r.filterBH, fillCache)
+ }
+ return r.filterBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter {
+ bi := &blockIter{
+ tr: r,
+ block: b,
+ blockReleaser: bReleaser,
+ // Valid key should never be nil.
+ key: make([]byte, 0),
+ dir: dirSOI,
+ riStart: 0,
+ riLimit: b.restartsLen,
+ offsetStart: 0,
+ offsetRealStart: 0,
+ offsetLimit: b.restartsOffset,
+ }
+ if slice != nil {
+ if slice.Start != nil {
+ if bi.Seek(slice.Start) {
+ bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
+ bi.offsetStart = b.restartOffset(bi.riStart)
+ bi.offsetRealStart = bi.prevOffset
+ } else {
+ bi.riStart = b.restartsLen
+ bi.offsetStart = b.restartsOffset
+ bi.offsetRealStart = b.restartsOffset
+ }
+ }
+ if slice.Limit != nil {
+ if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
+ bi.offsetLimit = bi.prevOffset
+ bi.riLimit = bi.restartIndex + 1
+ }
+ }
+ bi.reset()
+ if bi.offsetStart > bi.offsetLimit {
+ bi.sErr(errors.New("leveldb/table: invalid slice range"))
+ }
+ }
+ return bi
+}
+
+func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ return r.newBlockIter(b, rel, slice, false)
+}
+
+func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ return iterator.NewEmptyIterator(r.err)
+ }
+
+ return r.getDataIter(dataBH, slice, verifyChecksum, fillCache)
+}
+
+// NewIterator creates an iterator from the table.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// table. And a nil Range.Limit is treated as a key after all keys in
+// the table.
+//
+// The returned iterator is not safe for concurrent use and should be released
+// after use.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ return iterator.NewEmptyIterator(r.err)
+ }
+
+ fillCache := !ro.GetDontFillCache()
+ indexBlock, rel, err := r.getIndexBlock(fillCache)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ index := &indexIter{
+ blockIter: r.newBlockIter(indexBlock, rel, slice, true),
+ tr: r,
+ slice: slice,
+ fillCache: !ro.GetDontFillCache(),
+ }
+ return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader))
+}
+
+func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ indexBlock, rel, err := r.getIndexBlock(true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
+ defer index.Release()
+
+ if !index.Seek(key) {
+ if err = index.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+
+ dataBH, n := decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return nil, nil, r.err
+ }
+
+ // The filter should only used for exact match.
+ if filtered && r.filter != nil {
+ filterBlock, frel, ferr := r.getFilterBlock(true)
+ if ferr == nil {
+ if !filterBlock.contains(r.filter, dataBH.offset, key) {
+ frel.Release()
+ return nil, nil, ErrNotFound
+ }
+ frel.Release()
+ } else if !errors.IsCorrupted(ferr) {
+ return nil, nil, ferr
+ }
+ }
+
+ data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+ if !data.Seek(key) {
+ data.Release()
+ if err = data.Error(); err != nil {
+ return
+ }
+
+ // The nearest greater-than key is the first key of the next block.
+ if !index.Next() {
+ if err = index.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+
+ dataBH, n = decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return nil, nil, r.err
+ }
+
+ data = r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+ if !data.Next() {
+ data.Release()
+ if err = data.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+ }
+
+ // Key doesn't use block buffer, no need to copy the buffer.
+ rkey = data.Key()
+ if !noValue {
+ if r.bpool == nil {
+ value = data.Value()
+ } else {
+ // Value does use block buffer, and since the buffer will be
+ // recycled, it need to be copied.
+ value = append([]byte{}, data.Value()...)
+ }
+ }
+ data.Release()
+ return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such pair doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) {
+ return r.find(key, filtered, ro, false)
+}
+
+// FindKey finds key that is greater than or equal to the given key.
+// It returns ErrNotFound if the table doesn't contain such key.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such key doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) {
+ rkey, _, err = r.find(key, filtered, ro, true)
+ return
+}
+
+// Get gets the value for the given key. It returns errors.ErrNotFound
+// if the table does not contain the key.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ rkey, value, err := r.find(key, false, ro, false)
+ if err == nil && r.cmp.Compare(rkey, key) != 0 {
+ value = nil
+ err = ErrNotFound
+ }
+ return
+}
+
+// OffsetOf returns approximate offset for the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
+ defer index.Release()
+ if index.Seek(key) {
+ dataBH, n := decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return
+ }
+ offset = int64(dataBH.offset)
+ return
+ }
+ err = index.Error()
+ if err == nil {
+ offset = r.dataEnd
+ }
+ return
+}
+
+// Release implements util.Releaser.
+// It also close the file if it is an io.Closer.
+func (r *Reader) Release() {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if closer, ok := r.reader.(io.Closer); ok {
+ closer.Close()
+ }
+ if r.indexBlock != nil {
+ r.indexBlock.Release()
+ r.indexBlock = nil
+ }
+ if r.filterBlock != nil {
+ r.filterBlock.Release()
+ r.filterBlock = nil
+ }
+ r.reader = nil
+ r.cache = nil
+ r.bpool = nil
+ r.err = ErrReaderReleased
+}
+
+// NewReader creates a new initialized table reader for the file.
+// The fi, cache and bpool is optional and can be nil.
+//
+// The returned table reader instance is safe for concurrent use.
+func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
+ if f == nil {
+ return nil, errors.New("leveldb/table: nil file")
+ }
+
+ r := &Reader{
+ fd: fd,
+ reader: f,
+ cache: cache,
+ bpool: bpool,
+ o: o,
+ cmp: o.GetComparer(),
+ verifyChecksum: o.GetStrict(opt.StrictBlockChecksum),
+ }
+
+ if size < footerLen {
+ r.err = r.newErrCorrupted(0, size, "table", "too small")
+ return r, nil
+ }
+
+ footerPos := size - footerLen
+ var footer [footerLen]byte
+ if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF {
+ return nil, err
+ }
+ if string(footer[footerLen-len(magic):footerLen]) != magic {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number")
+ return r, nil
+ }
+
+ var n int
+ // Decode the metaindex block handle.
+ r.metaBH, n = decodeBlockHandle(footer[:])
+ if n == 0 {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle")
+ return r, nil
+ }
+
+ // Decode the index block handle.
+ r.indexBH, n = decodeBlockHandle(footer[n:])
+ if n == 0 {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle")
+ return r, nil
+ }
+
+ // Read metaindex block.
+ metaBlock, err := r.readBlock(r.metaBH, true)
+ if err != nil {
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ }
+ return nil, err
+ }
+
+ // Set data end.
+ r.dataEnd = int64(r.metaBH.offset)
+
+ // Read metaindex.
+ metaIter := r.newBlockIter(metaBlock, nil, nil, true)
+ for metaIter.Next() {
+ key := string(metaIter.Key())
+ if !strings.HasPrefix(key, "filter.") {
+ continue
+ }
+ fn := key[7:]
+ if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn {
+ r.filter = f0
+ } else {
+ for _, f0 := range o.GetAltFilters() {
+ if f0.Name() == fn {
+ r.filter = f0
+ break
+ }
+ }
+ }
+ if r.filter != nil {
+ filterBH, n := decodeBlockHandle(metaIter.Value())
+ if n == 0 {
+ continue
+ }
+ r.filterBH = filterBH
+ // Update data end.
+ r.dataEnd = int64(filterBH.offset)
+ break
+ }
+ }
+ metaIter.Release()
+ metaBlock.Release()
+
+ // Cache index and filter block locally, since we don't have global cache.
+ if cache == nil {
+ r.indexBlock, err = r.readBlock(r.indexBH, true)
+ if err != nil {
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ }
+ return nil, err
+ }
+ if r.filter != nil {
+ r.filterBlock, err = r.readFilterBlock(r.filterBH)
+ if err != nil {
+ if !errors.IsCorrupted(err) {
+ return nil, err
+ }
+
+ // Don't use filter then.
+ r.filter = nil
+ }
+ }
+ }
+
+ return r, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
new file mode 100644
index 000000000..beacdc1f0
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
@@ -0,0 +1,177 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package table allows read and write sorted key/value.
+package table
+
+import (
+ "encoding/binary"
+)
+
+/*
+Table:
+
+Table is consist of one or more data blocks, an optional filter block
+a metaindex block, an index block and a table footer. Metaindex block
+is a special block used to keep parameters of the table, such as filter
+block name and its block handle. Index block is a special block used to
+keep record of data blocks offset and length, index block use one as
+restart interval. The key used by index block are the last key of preceding
+block, shorter separator of adjacent blocks or shorter successor of the
+last key of the last block. Filter block is an optional block contains
+sequence of filter data generated by a filter generator.
+
+Table data structure:
+ + optional
+ /
+ +--------------+--------------+--------------+------+-------+-----------------+-------------+--------+
+ | data block 1 | ... | data block n | filter block | metaindex block | index block | footer |
+ +--------------+--------------+--------------+--------------+-----------------+-------------+--------+
+
+ Each block followed by a 5-bytes trailer contains compression type and checksum.
+
+Table block trailer:
+
+ +---------------------------+-------------------+
+ | compression type (1-byte) | checksum (4-byte) |
+ +---------------------------+-------------------+
+
+ The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression
+ type also included in the checksum.
+
+Table footer:
+
+ +------------------- 40-bytes -------------------+
+ / \
+ +------------------------+--------------------+------+-----------------+
+ | metaindex block handle / index block handle / ---- | magic (8-bytes) |
+ +------------------------+--------------------+------+-----------------+
+
+ The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/".
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Block:
+
+Block is consist of one or more key/value entries and a block trailer.
+Block entry shares key prefix with its preceding key until a restart
+point reached. A block should contains at least one restart point.
+First restart point are always zero.
+
+Block data structure:
+
+ + restart point + restart point (depends on restart interval)
+ / /
+ +---------------+---------------+---------------+---------------+---------+
+ | block entry 1 | block entry 2 | ... | block entry n | trailer |
+ +---------------+---------------+---------------+---------------+---------+
+
+Key/value entry:
+
+ +---- key len ----+
+ / \
+ +-------+---------+-----------+---------+--------------------+--------------+----------------+
+ | shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) |
+ +-----------------+---------------------+--------------------+--------------+----------------+
+
+ Block entry shares key prefix with its preceding key:
+ Conditions:
+ restart_interval=2
+ entry one : key=deck,value=v1
+ entry two : key=dock,value=v2
+ entry three: key=duck,value=v3
+ The entries will be encoded as follow:
+
+ + restart point (offset=0) + restart point (offset=16)
+ / /
+ +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+ | 0 | 4 | 2 | "deck" | "v1" | 1 | 3 | 2 | "ock" | "v2" | 0 | 4 | 2 | "duck" | "v3" |
+ +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+ \ / \ / \ /
+ +----------- entry one -----------+ +----------- entry two ----------+ +---------- entry three ----------+
+
+ The block trailer will contains two restart points:
+
+ +------------+-----------+--------+
+ | 0 | 16 | 2 |
+ +------------+-----------+---+----+
+ \ / \
+ +-- restart points --+ + restart points length
+
+Block trailer:
+
+ +-- 4-bytes --+
+ / \
+ +-----------------+-----------------+-----------------+------------------------------+
+ | restart point 1 | .... | restart point n | restart points len (4-bytes) |
+ +-----------------+-----------------+-----------------+------------------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Filter block:
+
+Filter block consist of one or more filter data and a filter block trailer.
+The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg.
+
+Filter block data structure:
+
+ + offset 1 + offset 2 + offset n + trailer offset
+ / / / /
+ +---------------+---------------+---------------+---------+
+ | filter data 1 | ... | filter data n | trailer |
+ +---------------+---------------+---------------+---------+
+
+Filter block trailer:
+
+ +- 4-bytes -+
+ / \
+ +---------------+---------------+---------------+-------------------------------+------------------+
+ | data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
+ +-------------- +---------------+---------------+-------------------------------+------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+const (
+ blockTrailerLen = 5
+ footerLen = 48
+
+ magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb"
+
+ // The block type gives the per-block compression format.
+ // These constants are part of the file format and should not be changed.
+ blockTypeNoCompression = 0
+ blockTypeSnappyCompression = 1
+
+ // Generate new filter every 2KB of data
+ filterBaseLg = 11
+ filterBase = 1 << filterBaseLg
+)
+
+type blockHandle struct {
+ offset, length uint64
+}
+
+func decodeBlockHandle(src []byte) (blockHandle, int) {
+ offset, n := binary.Uvarint(src)
+ length, m := binary.Uvarint(src[n:])
+ if n == 0 || m == 0 {
+ return blockHandle{}, 0
+ }
+ return blockHandle{offset, length}, n + m
+}
+
+func encodeBlockHandle(dst []byte, b blockHandle) int {
+ n := binary.PutUvarint(dst, b.offset)
+ m := binary.PutUvarint(dst[n:], b.length)
+ return n + m
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
new file mode 100644
index 000000000..b96b271d8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
@@ -0,0 +1,375 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/golang/snappy"
+
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func sharedPrefixLen(a, b []byte) int {
+ i, n := 0, len(a)
+ if n > len(b) {
+ n = len(b)
+ }
+ for i < n && a[i] == b[i] {
+ i++
+ }
+ return i
+}
+
+type blockWriter struct {
+ restartInterval int
+ buf util.Buffer
+ nEntries int
+ prevKey []byte
+ restarts []uint32
+ scratch []byte
+}
+
+func (w *blockWriter) append(key, value []byte) {
+ nShared := 0
+ if w.nEntries%w.restartInterval == 0 {
+ w.restarts = append(w.restarts, uint32(w.buf.Len()))
+ } else {
+ nShared = sharedPrefixLen(w.prevKey, key)
+ }
+ n := binary.PutUvarint(w.scratch[0:], uint64(nShared))
+ n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared))
+ n += binary.PutUvarint(w.scratch[n:], uint64(len(value)))
+ w.buf.Write(w.scratch[:n])
+ w.buf.Write(key[nShared:])
+ w.buf.Write(value)
+ w.prevKey = append(w.prevKey[:0], key...)
+ w.nEntries++
+}
+
+func (w *blockWriter) finish() {
+ // Write restarts entry.
+ if w.nEntries == 0 {
+ // Must have at least one restart entry.
+ w.restarts = append(w.restarts, 0)
+ }
+ w.restarts = append(w.restarts, uint32(len(w.restarts)))
+ for _, x := range w.restarts {
+ buf4 := w.buf.Alloc(4)
+ binary.LittleEndian.PutUint32(buf4, x)
+ }
+}
+
+func (w *blockWriter) reset() {
+ w.buf.Reset()
+ w.nEntries = 0
+ w.restarts = w.restarts[:0]
+}
+
+func (w *blockWriter) bytesLen() int {
+ restartsLen := len(w.restarts)
+ if restartsLen == 0 {
+ restartsLen = 1
+ }
+ return w.buf.Len() + 4*restartsLen + 4
+}
+
+type filterWriter struct {
+ generator filter.FilterGenerator
+ buf util.Buffer
+ nKeys int
+ offsets []uint32
+}
+
+func (w *filterWriter) add(key []byte) {
+ if w.generator == nil {
+ return
+ }
+ w.generator.Add(key)
+ w.nKeys++
+}
+
+func (w *filterWriter) flush(offset uint64) {
+ if w.generator == nil {
+ return
+ }
+ for x := int(offset / filterBase); x > len(w.offsets); {
+ w.generate()
+ }
+}
+
+func (w *filterWriter) finish() {
+ if w.generator == nil {
+ return
+ }
+ // Generate last keys.
+
+ if w.nKeys > 0 {
+ w.generate()
+ }
+ w.offsets = append(w.offsets, uint32(w.buf.Len()))
+ for _, x := range w.offsets {
+ buf4 := w.buf.Alloc(4)
+ binary.LittleEndian.PutUint32(buf4, x)
+ }
+ w.buf.WriteByte(filterBaseLg)
+}
+
+func (w *filterWriter) generate() {
+ // Record offset.
+ w.offsets = append(w.offsets, uint32(w.buf.Len()))
+ // Generate filters.
+ if w.nKeys > 0 {
+ w.generator.Generate(&w.buf)
+ w.nKeys = 0
+ }
+}
+
+// Writer is a table writer.
+type Writer struct {
+ writer io.Writer
+ err error
+ // Options
+ cmp comparer.Comparer
+ filter filter.Filter
+ compression opt.Compression
+ blockSize int
+
+ dataBlock blockWriter
+ indexBlock blockWriter
+ filterBlock filterWriter
+ pendingBH blockHandle
+ offset uint64
+ nEntries int
+ // Scratch allocated enough for 5 uvarint. Block writer should not use
+ // first 20-bytes since it will be used to encode block handle, which
+ // then passed to the block writer itself.
+ scratch [50]byte
+ comparerScratch []byte
+ compressionScratch []byte
+}
+
+func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) {
+ // Compress the buffer if necessary.
+ var b []byte
+ if compression == opt.SnappyCompression {
+ // Allocate scratch enough for compression and block trailer.
+ if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n {
+ w.compressionScratch = make([]byte, n)
+ }
+ compressed := snappy.Encode(w.compressionScratch, buf.Bytes())
+ n := len(compressed)
+ b = compressed[:n+blockTrailerLen]
+ b[n] = blockTypeSnappyCompression
+ } else {
+ tmp := buf.Alloc(blockTrailerLen)
+ tmp[0] = blockTypeNoCompression
+ b = buf.Bytes()
+ }
+
+ // Calculate the checksum.
+ n := len(b) - 4
+ checksum := util.NewCRC(b[:n]).Value()
+ binary.LittleEndian.PutUint32(b[n:], checksum)
+
+ // Write the buffer to the file.
+ _, err = w.writer.Write(b)
+ if err != nil {
+ return
+ }
+ bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)}
+ w.offset += uint64(len(b))
+ return
+}
+
+func (w *Writer) flushPendingBH(key []byte) {
+ if w.pendingBH.length == 0 {
+ return
+ }
+ var separator []byte
+ if len(key) == 0 {
+ separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey)
+ } else {
+ separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key)
+ }
+ if separator == nil {
+ separator = w.dataBlock.prevKey
+ } else {
+ w.comparerScratch = separator
+ }
+ n := encodeBlockHandle(w.scratch[:20], w.pendingBH)
+ // Append the block handle to the index block.
+ w.indexBlock.append(separator, w.scratch[:n])
+ // Reset prev key of the data block.
+ w.dataBlock.prevKey = w.dataBlock.prevKey[:0]
+ // Clear pending block handle.
+ w.pendingBH = blockHandle{}
+}
+
+func (w *Writer) finishBlock() error {
+ w.dataBlock.finish()
+ bh, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+ if err != nil {
+ return err
+ }
+ w.pendingBH = bh
+ // Reset the data block.
+ w.dataBlock.reset()
+ // Flush the filter block.
+ w.filterBlock.flush(w.offset)
+ return nil
+}
+
+// Append appends key/value pair to the table. The keys passed must
+// be in increasing order.
+//
+// It is safe to modify the contents of the arguments after Append returns.
+func (w *Writer) Append(key, value []byte) error {
+ if w.err != nil {
+ return w.err
+ }
+ if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 {
+ w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key)
+ return w.err
+ }
+
+ w.flushPendingBH(key)
+ // Append key/value pair to the data block.
+ w.dataBlock.append(key, value)
+ // Add key to the filter block.
+ w.filterBlock.add(key)
+
+ // Finish the data block if block size target reached.
+ if w.dataBlock.bytesLen() >= w.blockSize {
+ if err := w.finishBlock(); err != nil {
+ w.err = err
+ return w.err
+ }
+ }
+ w.nEntries++
+ return nil
+}
+
+// BlocksLen returns number of blocks written so far.
+func (w *Writer) BlocksLen() int {
+ n := w.indexBlock.nEntries
+ if w.pendingBH.length > 0 {
+ // Includes the pending block.
+ n++
+ }
+ return n
+}
+
+// EntriesLen returns number of entries added so far.
+func (w *Writer) EntriesLen() int {
+ return w.nEntries
+}
+
+// BytesLen returns number of bytes written so far.
+func (w *Writer) BytesLen() int {
+ return int(w.offset)
+}
+
+// Close will finalize the table. Calling Append is not possible
+// after Close, but calling BlocksLen, EntriesLen and BytesLen
+// is still possible.
+func (w *Writer) Close() error {
+ if w.err != nil {
+ return w.err
+ }
+
+ // Write the last data block. Or empty data block if there
+ // aren't any data blocks at all.
+ if w.dataBlock.nEntries > 0 || w.nEntries == 0 {
+ if err := w.finishBlock(); err != nil {
+ w.err = err
+ return w.err
+ }
+ }
+ w.flushPendingBH(nil)
+
+ // Write the filter block.
+ var filterBH blockHandle
+ w.filterBlock.finish()
+ if buf := &w.filterBlock.buf; buf.Len() > 0 {
+ filterBH, w.err = w.writeBlock(buf, opt.NoCompression)
+ if w.err != nil {
+ return w.err
+ }
+ }
+
+ // Write the metaindex block.
+ if filterBH.length > 0 {
+ key := []byte("filter." + w.filter.Name())
+ n := encodeBlockHandle(w.scratch[:20], filterBH)
+ w.dataBlock.append(key, w.scratch[:n])
+ }
+ w.dataBlock.finish()
+ metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+ if err != nil {
+ w.err = err
+ return w.err
+ }
+
+ // Write the index block.
+ w.indexBlock.finish()
+ indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression)
+ if err != nil {
+ w.err = err
+ return w.err
+ }
+
+ // Write the table footer.
+ footer := w.scratch[:footerLen]
+ for i := range footer {
+ footer[i] = 0
+ }
+ n := encodeBlockHandle(footer, metaindexBH)
+ encodeBlockHandle(footer[n:], indexBH)
+ copy(footer[footerLen-len(magic):], magic)
+ if _, err := w.writer.Write(footer); err != nil {
+ w.err = err
+ return w.err
+ }
+ w.offset += footerLen
+
+ w.err = errors.New("leveldb/table: writer is closed")
+ return nil
+}
+
+// NewWriter creates a new initialized table writer for the file.
+//
+// Table writer is not safe for concurrent use.
+func NewWriter(f io.Writer, o *opt.Options) *Writer {
+ w := &Writer{
+ writer: f,
+ cmp: o.GetComparer(),
+ filter: o.GetFilter(),
+ compression: o.GetCompression(),
+ blockSize: o.GetBlockSize(),
+ comparerScratch: make([]byte, 0),
+ }
+ // data block
+ w.dataBlock.restartInterval = o.GetBlockRestartInterval()
+ // The first 20-bytes are used for encoding block handle.
+ w.dataBlock.scratch = w.scratch[20:]
+ // index block
+ w.indexBlock.restartInterval = 1
+ w.indexBlock.scratch = w.scratch[20:]
+ // filter block
+ if w.filter != nil {
+ w.filterBlock.generator = w.filter.NewGenerator()
+ w.filterBlock.flush(0)
+ }
+ return w
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
new file mode 100644
index 000000000..e572a329e
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
@@ -0,0 +1,98 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sort"
+
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+func shorten(str string) string {
+ if len(str) <= 8 {
+ return str
+ }
+ return str[:3] + ".." + str[len(str)-3:]
+}
+
+var bunits = [...]string{"", "Ki", "Mi", "Gi"}
+
+func shortenb(bytes int) string {
+ i := 0
+ for ; bytes > 1024 && i < 4; i++ {
+ bytes /= 1024
+ }
+ return fmt.Sprintf("%d%sB", bytes, bunits[i])
+}
+
+func sshortenb(bytes int) string {
+ if bytes == 0 {
+ return "~"
+ }
+ sign := "+"
+ if bytes < 0 {
+ sign = "-"
+ bytes *= -1
+ }
+ i := 0
+ for ; bytes > 1024 && i < 4; i++ {
+ bytes /= 1024
+ }
+ return fmt.Sprintf("%s%d%sB", sign, bytes, bunits[i])
+}
+
+func sint(x int) string {
+ if x == 0 {
+ return "~"
+ }
+ sign := "+"
+ if x < 0 {
+ sign = "-"
+ x *= -1
+ }
+ return fmt.Sprintf("%s%d", sign, x)
+}
+
+func minInt(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
+func maxInt(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
+
+type fdSorter []storage.FileDesc
+
+func (p fdSorter) Len() int {
+ return len(p)
+}
+
+func (p fdSorter) Less(i, j int) bool {
+ return p[i].Num < p[j].Num
+}
+
+func (p fdSorter) Swap(i, j int) {
+ p[i], p[j] = p[j], p[i]
+}
+
+func sortFds(fds []storage.FileDesc) {
+ sort.Sort(fdSorter(fds))
+}
+
+func ensureBuffer(b []byte, n int) []byte {
+ if cap(b) < n {
+ return make([]byte, n)
+ }
+ return b[:n]
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
new file mode 100644
index 000000000..21de24255
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
@@ -0,0 +1,293 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+// This a copy of Go std bytes.Buffer with some modification
+// and some features stripped.
+
+import (
+ "bytes"
+ "io"
+)
+
+// A Buffer is a variable-sized buffer of bytes with Read and Write methods.
+// The zero value for Buffer is an empty buffer ready to use.
+type Buffer struct {
+ buf []byte // contents are the bytes buf[off : len(buf)]
+ off int // read at &buf[off], write at &buf[len(buf)]
+ bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation.
+}
+
+// Bytes returns a slice of the contents of the unread portion of the buffer;
+// len(b.Bytes()) == b.Len(). If the caller changes the contents of the
+// returned slice, the contents of the buffer will change provided there
+// are no intervening method calls on the Buffer.
+func (b *Buffer) Bytes() []byte { return b.buf[b.off:] }
+
+// String returns the contents of the unread portion of the buffer
+// as a string. If the Buffer is a nil pointer, it returns "<nil>".
+func (b *Buffer) String() string {
+ if b == nil {
+ // Special case, useful in debugging.
+ return "<nil>"
+ }
+ return string(b.buf[b.off:])
+}
+
+// Len returns the number of bytes of the unread portion of the buffer;
+// b.Len() == len(b.Bytes()).
+func (b *Buffer) Len() int { return len(b.buf) - b.off }
+
+// Truncate discards all but the first n unread bytes from the buffer.
+// It panics if n is negative or greater than the length of the buffer.
+func (b *Buffer) Truncate(n int) {
+ switch {
+ case n < 0 || n > b.Len():
+ panic("leveldb/util.Buffer: truncation out of range")
+ case n == 0:
+ // Reuse buffer space.
+ b.off = 0
+ }
+ b.buf = b.buf[0 : b.off+n]
+}
+
+// Reset resets the buffer so it has no content.
+// b.Reset() is the same as b.Truncate(0).
+func (b *Buffer) Reset() { b.Truncate(0) }
+
+// grow grows the buffer to guarantee space for n more bytes.
+// It returns the index where bytes should be written.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) grow(n int) int {
+ m := b.Len()
+ // If buffer is empty, reset to recover space.
+ if m == 0 && b.off != 0 {
+ b.Truncate(0)
+ }
+ if len(b.buf)+n > cap(b.buf) {
+ var buf []byte
+ if b.buf == nil && n <= len(b.bootstrap) {
+ buf = b.bootstrap[0:]
+ } else if m+n <= cap(b.buf)/2 {
+ // We can slide things down instead of allocating a new
+ // slice. We only need m+n <= cap(b.buf) to slide, but
+ // we instead let capacity get twice as large so we
+ // don't spend all our time copying.
+ copy(b.buf[:], b.buf[b.off:])
+ buf = b.buf[:m]
+ } else {
+ // not enough space anywhere
+ buf = makeSlice(2*cap(b.buf) + n)
+ copy(buf, b.buf[b.off:])
+ }
+ b.buf = buf
+ b.off = 0
+ }
+ b.buf = b.buf[0 : b.off+m+n]
+ return b.off + m
+}
+
+// Alloc allocs n bytes of slice from the buffer, growing the buffer as
+// needed. If n is negative, Alloc will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Alloc(n int) []byte {
+ if n < 0 {
+ panic("leveldb/util.Buffer.Alloc: negative count")
+ }
+ m := b.grow(n)
+ return b.buf[m:]
+}
+
+// Grow grows the buffer's capacity, if necessary, to guarantee space for
+// another n bytes. After Grow(n), at least n bytes can be written to the
+// buffer without another allocation.
+// If n is negative, Grow will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Grow(n int) {
+ if n < 0 {
+ panic("leveldb/util.Buffer.Grow: negative count")
+ }
+ m := b.grow(n)
+ b.buf = b.buf[0:m]
+}
+
+// Write appends the contents of p to the buffer, growing the buffer as
+// needed. The return value n is the length of p; err is always nil. If the
+// buffer becomes too large, Write will panic with bytes.ErrTooLarge.
+func (b *Buffer) Write(p []byte) (n int, err error) {
+ m := b.grow(len(p))
+ return copy(b.buf[m:], p), nil
+}
+
+// MinRead is the minimum slice size passed to a Read call by
+// Buffer.ReadFrom. As long as the Buffer has at least MinRead bytes beyond
+// what is required to hold the contents of r, ReadFrom will not grow the
+// underlying buffer.
+const MinRead = 512
+
+// ReadFrom reads data from r until EOF and appends it to the buffer, growing
+// the buffer as needed. The return value n is the number of bytes read. Any
+// error except io.EOF encountered during the read is also returned. If the
+// buffer becomes too large, ReadFrom will panic with bytes.ErrTooLarge.
+func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) {
+ // If buffer is empty, reset to recover space.
+ if b.off >= len(b.buf) {
+ b.Truncate(0)
+ }
+ for {
+ if free := cap(b.buf) - len(b.buf); free < MinRead {
+ // not enough space at end
+ newBuf := b.buf
+ if b.off+free < MinRead {
+ // not enough space using beginning of buffer;
+ // double buffer capacity
+ newBuf = makeSlice(2*cap(b.buf) + MinRead)
+ }
+ copy(newBuf, b.buf[b.off:])
+ b.buf = newBuf[:len(b.buf)-b.off]
+ b.off = 0
+ }
+ m, e := r.Read(b.buf[len(b.buf):cap(b.buf)])
+ b.buf = b.buf[0 : len(b.buf)+m]
+ n += int64(m)
+ if e == io.EOF {
+ break
+ }
+ if e != nil {
+ return n, e
+ }
+ }
+ return n, nil // err is EOF, so return nil explicitly
+}
+
+// makeSlice allocates a slice of size n. If the allocation fails, it panics
+// with bytes.ErrTooLarge.
+func makeSlice(n int) []byte {
+ // If the make fails, give a known error.
+ defer func() {
+ if recover() != nil {
+ panic(bytes.ErrTooLarge)
+ }
+ }()
+ return make([]byte, n)
+}
+
+// WriteTo writes data to w until the buffer is drained or an error occurs.
+// The return value n is the number of bytes written; it always fits into an
+// int, but it is int64 to match the io.WriterTo interface. Any error
+// encountered during the write is also returned.
+func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) {
+ if b.off < len(b.buf) {
+ nBytes := b.Len()
+ m, e := w.Write(b.buf[b.off:])
+ if m > nBytes {
+ panic("leveldb/util.Buffer.WriteTo: invalid Write count")
+ }
+ b.off += m
+ n = int64(m)
+ if e != nil {
+ return n, e
+ }
+ // all bytes should have been written, by definition of
+ // Write method in io.Writer
+ if m != nBytes {
+ return n, io.ErrShortWrite
+ }
+ }
+ // Buffer is now empty; reset.
+ b.Truncate(0)
+ return
+}
+
+// WriteByte appends the byte c to the buffer, growing the buffer as needed.
+// The returned error is always nil, but is included to match bufio.Writer's
+// WriteByte. If the buffer becomes too large, WriteByte will panic with
+// bytes.ErrTooLarge.
+func (b *Buffer) WriteByte(c byte) error {
+ m := b.grow(1)
+ b.buf[m] = c
+ return nil
+}
+
+// Read reads the next len(p) bytes from the buffer or until the buffer
+// is drained. The return value n is the number of bytes read. If the
+// buffer has no data to return, err is io.EOF (unless len(p) is zero);
+// otherwise it is nil.
+func (b *Buffer) Read(p []byte) (n int, err error) {
+ if b.off >= len(b.buf) {
+ // Buffer is empty, reset to recover space.
+ b.Truncate(0)
+ if len(p) == 0 {
+ return
+ }
+ return 0, io.EOF
+ }
+ n = copy(p, b.buf[b.off:])
+ b.off += n
+ return
+}
+
+// Next returns a slice containing the next n bytes from the buffer,
+// advancing the buffer as if the bytes had been returned by Read.
+// If there are fewer than n bytes in the buffer, Next returns the entire buffer.
+// The slice is only valid until the next call to a read or write method.
+func (b *Buffer) Next(n int) []byte {
+ m := b.Len()
+ if n > m {
+ n = m
+ }
+ data := b.buf[b.off : b.off+n]
+ b.off += n
+ return data
+}
+
+// ReadByte reads and returns the next byte from the buffer.
+// If no byte is available, it returns error io.EOF.
+func (b *Buffer) ReadByte() (c byte, err error) {
+ if b.off >= len(b.buf) {
+ // Buffer is empty, reset to recover space.
+ b.Truncate(0)
+ return 0, io.EOF
+ }
+ c = b.buf[b.off]
+ b.off++
+ return c, nil
+}
+
+// ReadBytes reads until the first occurrence of delim in the input,
+// returning a slice containing the data up to and including the delimiter.
+// If ReadBytes encounters an error before finding a delimiter,
+// it returns the data read before the error and the error itself (often io.EOF).
+// ReadBytes returns err != nil if and only if the returned data does not end in
+// delim.
+func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) {
+ slice, err := b.readSlice(delim)
+ // return a copy of slice. The buffer's backing array may
+ // be overwritten by later calls.
+ line = append(line, slice...)
+ return
+}
+
+// readSlice is like ReadBytes but returns a reference to internal buffer data.
+func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
+ i := bytes.IndexByte(b.buf[b.off:], delim)
+ end := b.off + i + 1
+ if i < 0 {
+ end = len(b.buf)
+ err = io.EOF
+ }
+ line = b.buf[b.off:end]
+ b.off = end
+ return line, err
+}
+
+// NewBuffer creates and initializes a new Buffer using buf as its initial
+// contents. It is intended to prepare a Buffer to read existing data. It
+// can also be used to size the internal buffer for writing. To do that,
+// buf should have the desired capacity but a length of zero.
+//
+// In most cases, new(Buffer) (or just declaring a Buffer variable) is
+// sufficient to initialize a Buffer.
+func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
new file mode 100644
index 000000000..2f3db974a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
@@ -0,0 +1,239 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type buffer struct {
+ b []byte
+ miss int
+}
+
+// BufferPool is a 'buffer pool'.
+type BufferPool struct {
+ pool [6]chan []byte
+ size [5]uint32
+ sizeMiss [5]uint32
+ sizeHalf [5]uint32
+ baseline [4]int
+ baseline0 int
+
+ mu sync.RWMutex
+ closed bool
+ closeC chan struct{}
+
+ get uint32
+ put uint32
+ half uint32
+ less uint32
+ equal uint32
+ greater uint32
+ miss uint32
+}
+
+func (p *BufferPool) poolNum(n int) int {
+ if n <= p.baseline0 && n > p.baseline0/2 {
+ return 0
+ }
+ for i, x := range p.baseline {
+ if n <= x {
+ return i + 1
+ }
+ }
+ return len(p.baseline) + 1
+}
+
+// Get returns buffer with length of n.
+func (p *BufferPool) Get(n int) []byte {
+ if p == nil {
+ return make([]byte, n)
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return make([]byte, n)
+ }
+
+ atomic.AddUint32(&p.get, 1)
+
+ poolNum := p.poolNum(n)
+ pool := p.pool[poolNum]
+ if poolNum == 0 {
+ // Fast path.
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ select {
+ case pool <- b:
+ default:
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ return make([]byte, n, p.baseline0)
+ } else {
+ sizePtr := &p.size[poolNum-1]
+
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ sizeHalfPtr := &p.sizeHalf[poolNum-1]
+ if atomic.AddUint32(sizeHalfPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(cap(b)/2))
+ atomic.StoreUint32(sizeHalfPtr, 0)
+ } else {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ if uint32(cap(b)) >= atomic.LoadUint32(sizePtr) {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ if size := atomic.LoadUint32(sizePtr); uint32(n) > size {
+ if size == 0 {
+ atomic.CompareAndSwapUint32(sizePtr, 0, uint32(n))
+ } else {
+ sizeMissPtr := &p.sizeMiss[poolNum-1]
+ if atomic.AddUint32(sizeMissPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(n))
+ atomic.StoreUint32(sizeMissPtr, 0)
+ }
+ }
+ return make([]byte, n)
+ } else {
+ return make([]byte, n, size)
+ }
+ }
+}
+
+// Put adds given buffer to the pool.
+func (p *BufferPool) Put(b []byte) {
+ if p == nil {
+ return
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return
+ }
+
+ atomic.AddUint32(&p.put, 1)
+
+ pool := p.pool[p.poolNum(cap(b))]
+ select {
+ case pool <- b:
+ default:
+ }
+
+}
+
+func (p *BufferPool) Close() {
+ if p == nil {
+ return
+ }
+
+ p.mu.Lock()
+ if !p.closed {
+ p.closed = true
+ p.closeC <- struct{}{}
+ }
+ p.mu.Unlock()
+}
+
+func (p *BufferPool) String() string {
+ if p == nil {
+ return "<nil>"
+ }
+
+ return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}",
+ p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss)
+}
+
+func (p *BufferPool) drain() {
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ticker.C:
+ for _, ch := range p.pool {
+ select {
+ case <-ch:
+ default:
+ }
+ }
+ case <-p.closeC:
+ close(p.closeC)
+ for _, ch := range p.pool {
+ close(ch)
+ }
+ return
+ }
+ }
+}
+
+// NewBufferPool creates a new initialized 'buffer pool'.
+func NewBufferPool(baseline int) *BufferPool {
+ if baseline <= 0 {
+ panic("baseline can't be <= 0")
+ }
+ p := &BufferPool{
+ baseline0: baseline,
+ baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4},
+ closeC: make(chan struct{}, 1),
+ }
+ for i, cap := range []int{2, 2, 4, 4, 2, 1} {
+ p.pool[i] = make(chan []byte, cap)
+ }
+ go p.drain()
+ return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
new file mode 100644
index 000000000..631c9d610
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
@@ -0,0 +1,30 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "hash/crc32"
+)
+
+var table = crc32.MakeTable(crc32.Castagnoli)
+
+// CRC is a CRC-32 checksum computed using Castagnoli's polynomial.
+type CRC uint32
+
+// NewCRC creates a new crc based on the given bytes.
+func NewCRC(b []byte) CRC {
+ return CRC(0).Update(b)
+}
+
+// Update updates the crc with the given bytes.
+func (c CRC) Update(b []byte) CRC {
+ return CRC(crc32.Update(uint32(c), table, b))
+}
+
+// Value returns a masked crc.
+func (c CRC) Value() uint32 {
+ return uint32(c>>15|c<<17) + 0xa282ead8
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
new file mode 100644
index 000000000..7f3fa4e2c
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
@@ -0,0 +1,48 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "encoding/binary"
+)
+
+// Hash return hash of the given data.
+func Hash(data []byte, seed uint32) uint32 {
+ // Similar to murmur hash
+ const (
+ m = uint32(0xc6a4a793)
+ r = uint32(24)
+ )
+ var (
+ h = seed ^ (uint32(len(data)) * m)
+ i int
+ )
+
+ for n := len(data) - len(data)%4; i < n; i += 4 {
+ h += binary.LittleEndian.Uint32(data[i:])
+ h *= m
+ h ^= (h >> 16)
+ }
+
+ switch len(data) - i {
+ default:
+ panic("not reached")
+ case 3:
+ h += uint32(data[i+2]) << 16
+ fallthrough
+ case 2:
+ h += uint32(data[i+1]) << 8
+ fallthrough
+ case 1:
+ h += uint32(data[i])
+ h *= m
+ h ^= (h >> r)
+ case 0:
+ }
+
+ return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
new file mode 100644
index 000000000..85159583d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
@@ -0,0 +1,32 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+// Range is a key range.
+type Range struct {
+ // Start of the key range, include in the range.
+ Start []byte
+
+ // Limit of the key range, not include in the range.
+ Limit []byte
+}
+
+// BytesPrefix returns key range that satisfy the given prefix.
+// This only applicable for the standard 'bytes comparer'.
+func BytesPrefix(prefix []byte) *Range {
+ var limit []byte
+ for i := len(prefix) - 1; i >= 0; i-- {
+ c := prefix[i]
+ if c < 0xff {
+ limit = make([]byte, i+1)
+ copy(limit, prefix)
+ limit[i] = c + 1
+ break
+ }
+ }
+ return &Range{prefix, limit}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
new file mode 100644
index 000000000..f35976865
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
@@ -0,0 +1,73 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package util provides utilities used throughout leveldb.
+package util
+
+import (
+ "errors"
+)
+
+var (
+ ErrReleased = errors.New("leveldb: resource already relesed")
+ ErrHasReleaser = errors.New("leveldb: releaser already defined")
+)
+
+// Releaser is the interface that wraps the basic Release method.
+type Releaser interface {
+ // Release releases associated resources. Release should always success
+ // and can be called multipe times without causing error.
+ Release()
+}
+
+// ReleaseSetter is the interface that wraps the basic SetReleaser method.
+type ReleaseSetter interface {
+ // SetReleaser associates the given releaser to the resources. The
+ // releaser will be called once coresponding resources released.
+ // Calling SetReleaser with nil will clear the releaser.
+ //
+ // This will panic if a releaser already present or coresponding
+ // resource is already released. Releaser should be cleared first
+ // before assigned a new one.
+ SetReleaser(releaser Releaser)
+}
+
+// BasicReleaser provides basic implementation of Releaser and ReleaseSetter.
+type BasicReleaser struct {
+ releaser Releaser
+ released bool
+}
+
+// Released returns whether Release method already called.
+func (r *BasicReleaser) Released() bool {
+ return r.released
+}
+
+// Release implements Releaser.Release.
+func (r *BasicReleaser) Release() {
+ if !r.released {
+ if r.releaser != nil {
+ r.releaser.Release()
+ r.releaser = nil
+ }
+ r.released = true
+ }
+}
+
+// SetReleaser implements ReleaseSetter.SetReleaser.
+func (r *BasicReleaser) SetReleaser(releaser Releaser) {
+ if r.released {
+ panic(ErrReleased)
+ }
+ if r.releaser != nil && releaser != nil {
+ panic(ErrHasReleaser)
+ }
+ r.releaser = releaser
+}
+
+type NoopReleaser struct{}
+
+func (NoopReleaser) Release() {}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/version.go b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
new file mode 100644
index 000000000..c60f12c20
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
@@ -0,0 +1,524 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sync/atomic"
+ "unsafe"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type tSet struct {
+ level int
+ table *tFile
+}
+
+type version struct {
+ s *session
+
+ levels []tFiles
+
+ // Level that should be compacted next and its compaction score.
+ // Score < 1 means compaction is not strictly needed. These fields
+ // are initialized by computeCompaction()
+ cLevel int
+ cScore float64
+
+ cSeek unsafe.Pointer
+
+ closing bool
+ ref int
+ // Succeeding version.
+ next *version
+}
+
+func newVersion(s *session) *version {
+ return &version{s: s}
+}
+
+func (v *version) releaseNB() {
+ v.ref--
+ if v.ref > 0 {
+ return
+ }
+ if v.ref < 0 {
+ panic("negative version ref")
+ }
+
+ nextTables := make(map[int64]bool)
+ for _, tt := range v.next.levels {
+ for _, t := range tt {
+ num := t.fd.Num
+ nextTables[num] = true
+ }
+ }
+
+ for _, tt := range v.levels {
+ for _, t := range tt {
+ num := t.fd.Num
+ if _, ok := nextTables[num]; !ok {
+ v.s.tops.remove(t)
+ }
+ }
+ }
+
+ v.next.releaseNB()
+ v.next = nil
+}
+
+func (v *version) release() {
+ v.s.vmu.Lock()
+ v.releaseNB()
+ v.s.vmu.Unlock()
+}
+
+func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
+ ukey := ikey.ukey()
+
+ // Aux level.
+ if aux != nil {
+ for _, t := range aux {
+ if t.overlaps(v.s.icmp, ukey, ukey) {
+ if !f(-1, t) {
+ return
+ }
+ }
+ }
+
+ if lf != nil && !lf(-1) {
+ return
+ }
+ }
+
+ // Walk tables level-by-level.
+ for level, tables := range v.levels {
+ if len(tables) == 0 {
+ continue
+ }
+
+ if level == 0 {
+ // Level-0 files may overlap each other. Find all files that
+ // overlap ukey.
+ for _, t := range tables {
+ if t.overlaps(v.s.icmp, ukey, ukey) {
+ if !f(level, t) {
+ return
+ }
+ }
+ }
+ } else {
+ if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
+ t := tables[i]
+ if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ if !f(level, t) {
+ return
+ }
+ }
+ }
+ }
+
+ if lf != nil && !lf(level) {
+ return
+ }
+ }
+}
+
+func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
+ if v.closing {
+ return nil, false, ErrClosed
+ }
+
+ ukey := ikey.ukey()
+
+ var (
+ tset *tSet
+ tseek bool
+
+ // Level-0.
+ zfound bool
+ zseq uint64
+ zkt keyType
+ zval []byte
+ )
+
+ err = ErrNotFound
+
+ // Since entries never hop across level, finding key/value
+ // in smaller level make later levels irrelevant.
+ v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
+ if level >= 0 && !tseek {
+ if tset == nil {
+ tset = &tSet{level, t}
+ } else {
+ tseek = true
+ }
+ }
+
+ var (
+ fikey, fval []byte
+ ferr error
+ )
+ if noValue {
+ fikey, ferr = v.s.tops.findKey(t, ikey, ro)
+ } else {
+ fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
+ }
+
+ switch ferr {
+ case nil:
+ case ErrNotFound:
+ return true
+ default:
+ err = ferr
+ return false
+ }
+
+ if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
+ if v.s.icmp.uCompare(ukey, fukey) == 0 {
+ // Level <= 0 may overlaps each-other.
+ if level <= 0 {
+ if fseq >= zseq {
+ zfound = true
+ zseq = fseq
+ zkt = fkt
+ zval = fval
+ }
+ } else {
+ switch fkt {
+ case keyTypeVal:
+ value = fval
+ err = nil
+ case keyTypeDel:
+ default:
+ panic("leveldb: invalid internalKey type")
+ }
+ return false
+ }
+ }
+ } else {
+ err = fkerr
+ return false
+ }
+
+ return true
+ }, func(level int) bool {
+ if zfound {
+ switch zkt {
+ case keyTypeVal:
+ value = zval
+ err = nil
+ case keyTypeDel:
+ default:
+ panic("leveldb: invalid internalKey type")
+ }
+ return false
+ }
+
+ return true
+ })
+
+ if tseek && tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+ }
+
+ return
+}
+
+func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
+ var tset *tSet
+
+ v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
+ if tset == nil {
+ tset = &tSet{level, t}
+ return true
+ }
+ if tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+ }
+ return false
+ }, nil)
+
+ return
+}
+
+func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
+ strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
+ for level, tables := range v.levels {
+ if level == 0 {
+ // Merge all level zero files together since they may overlap.
+ for _, t := range tables {
+ its = append(its, v.s.tops.newIterator(t, slice, ro))
+ }
+ } else if len(tables) != 0 {
+ its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
+ }
+ }
+ return
+}
+
+func (v *version) newStaging() *versionStaging {
+ return &versionStaging{base: v}
+}
+
+// Spawn a new version based on this version.
+func (v *version) spawn(r *sessionRecord) *version {
+ staging := v.newStaging()
+ staging.commit(r)
+ return staging.finish()
+}
+
+func (v *version) fillRecord(r *sessionRecord) {
+ for level, tables := range v.levels {
+ for _, t := range tables {
+ r.addTableFile(level, t)
+ }
+ }
+}
+
+func (v *version) tLen(level int) int {
+ if level < len(v.levels) {
+ return len(v.levels[level])
+ }
+ return 0
+}
+
+func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
+ for level, tables := range v.levels {
+ for _, t := range tables {
+ if v.s.icmp.Compare(t.imax, ikey) <= 0 {
+ // Entire file is before "ikey", so just add the file size
+ n += t.size
+ } else if v.s.icmp.Compare(t.imin, ikey) > 0 {
+ // Entire file is after "ikey", so ignore
+ if level > 0 {
+ // Files other than level 0 are sorted by meta->min, so
+ // no further files in this level will contain data for
+ // "ikey".
+ break
+ }
+ } else {
+ // "ikey" falls in the range for this table. Add the
+ // approximate offset of "ikey" within the table.
+ if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
+ n += m
+ } else {
+ return 0, err
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
+ if maxLevel > 0 {
+ if len(v.levels) == 0 {
+ return maxLevel
+ }
+ if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
+ var overlaps tFiles
+ for ; level < maxLevel; level++ {
+ if pLevel := level + 1; pLevel >= len(v.levels) {
+ return maxLevel
+ } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
+ break
+ }
+ if gpLevel := level + 2; gpLevel < len(v.levels) {
+ overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
+ if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
+ break
+ }
+ }
+ }
+ }
+ }
+ return
+}
+
+func (v *version) computeCompaction() {
+ // Precomputed best level for next compaction
+ bestLevel := int(-1)
+ bestScore := float64(-1)
+
+ statFiles := make([]int, len(v.levels))
+ statSizes := make([]string, len(v.levels))
+ statScore := make([]string, len(v.levels))
+ statTotSize := int64(0)
+
+ for level, tables := range v.levels {
+ var score float64
+ size := tables.size()
+ if level == 0 {
+ // We treat level-0 specially by bounding the number of files
+ // instead of number of bytes for two reasons:
+ //
+ // (1) With larger write-buffer sizes, it is nice not to do too
+ // many level-0 compaction.
+ //
+ // (2) The files in level-0 are merged on every read and
+ // therefore we wish to avoid too many files when the individual
+ // file size is small (perhaps because of a small write-buffer
+ // setting, or very high compression ratios, or lots of
+ // overwrites/deletions).
+ score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
+ } else {
+ score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
+ }
+
+ if score > bestScore {
+ bestLevel = level
+ bestScore = score
+ }
+
+ statFiles[level] = len(tables)
+ statSizes[level] = shortenb(int(size))
+ statScore[level] = fmt.Sprintf("%.2f", score)
+ statTotSize += size
+ }
+
+ v.cLevel = bestLevel
+ v.cScore = bestScore
+
+ v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
+}
+
+func (v *version) needCompaction() bool {
+ return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
+}
+
+type tablesScratch struct {
+ added map[int64]atRecord
+ deleted map[int64]struct{}
+}
+
+type versionStaging struct {
+ base *version
+ levels []tablesScratch
+}
+
+func (p *versionStaging) getScratch(level int) *tablesScratch {
+ if level >= len(p.levels) {
+ newLevels := make([]tablesScratch, level+1)
+ copy(newLevels, p.levels)
+ p.levels = newLevels
+ }
+ return &(p.levels[level])
+}
+
+func (p *versionStaging) commit(r *sessionRecord) {
+ // Deleted tables.
+ for _, r := range r.deletedTables {
+ scratch := p.getScratch(r.level)
+ if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
+ if scratch.deleted == nil {
+ scratch.deleted = make(map[int64]struct{})
+ }
+ scratch.deleted[r.num] = struct{}{}
+ }
+ if scratch.added != nil {
+ delete(scratch.added, r.num)
+ }
+ }
+
+ // New tables.
+ for _, r := range r.addedTables {
+ scratch := p.getScratch(r.level)
+ if scratch.added == nil {
+ scratch.added = make(map[int64]atRecord)
+ }
+ scratch.added[r.num] = r
+ if scratch.deleted != nil {
+ delete(scratch.deleted, r.num)
+ }
+ }
+}
+
+func (p *versionStaging) finish() *version {
+ // Build new version.
+ nv := newVersion(p.base.s)
+ numLevel := len(p.levels)
+ if len(p.base.levels) > numLevel {
+ numLevel = len(p.base.levels)
+ }
+ nv.levels = make([]tFiles, numLevel)
+ for level := 0; level < numLevel; level++ {
+ var baseTabels tFiles
+ if level < len(p.base.levels) {
+ baseTabels = p.base.levels[level]
+ }
+
+ if level < len(p.levels) {
+ scratch := p.levels[level]
+
+ var nt tFiles
+ // Prealloc list if possible.
+ if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
+ nt = make(tFiles, 0, n)
+ }
+
+ // Base tables.
+ for _, t := range baseTabels {
+ if _, ok := scratch.deleted[t.fd.Num]; ok {
+ continue
+ }
+ if _, ok := scratch.added[t.fd.Num]; ok {
+ continue
+ }
+ nt = append(nt, t)
+ }
+
+ // New tables.
+ for _, r := range scratch.added {
+ nt = append(nt, tableFileFromRecord(r))
+ }
+
+ if len(nt) != 0 {
+ // Sort tables.
+ if level == 0 {
+ nt.sortByNum()
+ } else {
+ nt.sortByKey(p.base.s.icmp)
+ }
+
+ nv.levels[level] = nt
+ }
+ } else {
+ nv.levels[level] = baseTabels
+ }
+ }
+
+ // Trim levels.
+ n := len(nv.levels)
+ for ; n > 0 && nv.levels[n-1] == nil; n-- {
+ }
+ nv.levels = nv.levels[:n]
+
+ // Compute compaction score for new version.
+ nv.computeCompaction()
+
+ return nv
+}
+
+type versionReleaser struct {
+ v *version
+ once bool
+}
+
+func (vr *versionReleaser) Release() {
+ v := vr.v
+ v.s.vmu.Lock()
+ if !vr.once {
+ v.releaseNB()
+ vr.once = true
+ }
+ v.s.vmu.Unlock()
+}