aboutsummaryrefslogtreecommitdiffstats
path: root/Godeps/_workspace/src/github.com/syndtr
diff options
context:
space:
mode:
authorPéter Szilágyi <peterke@gmail.com>2015-04-28 17:18:01 +0800
committerPéter Szilágyi <peterke@gmail.com>2015-04-28 17:18:01 +0800
commit7e3b080f8517731db774d5d2587b9ded4f9716e0 (patch)
treec27488e8e84dacaece8b07458e187906b7940384 /Godeps/_workspace/src/github.com/syndtr
parent182d484aa70bcd5b22117f02333b1fd3b1535dcb (diff)
downloadgo-tangerine-7e3b080f8517731db774d5d2587b9ded4f9716e0.tar
go-tangerine-7e3b080f8517731db774d5d2587b9ded4f9716e0.tar.gz
go-tangerine-7e3b080f8517731db774d5d2587b9ded4f9716e0.tar.bz2
go-tangerine-7e3b080f8517731db774d5d2587b9ded4f9716e0.tar.lz
go-tangerine-7e3b080f8517731db774d5d2587b9ded4f9716e0.tar.xz
go-tangerine-7e3b080f8517731db774d5d2587b9ded4f9716e0.tar.zst
go-tangerine-7e3b080f8517731db774d5d2587b9ded4f9716e0.zip
godeps: update leveldb and snappy, dump serpent-go
Diffstat (limited to 'Godeps/_workspace/src/github.com/syndtr')
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go228
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go26
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench2_test.go58
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go15
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/bench2_test.go30
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go713
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go564
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/empty_cache.go246
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru.go195
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go354
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go40
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go76
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go574
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go767
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go108
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go148
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go207
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go959
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go51
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go180
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go10
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go (renamed from Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go)24
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors/errors.go76
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go24
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go30
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go73
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go2
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go27
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go8
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go29
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go115
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go490
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go133
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go94
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go13
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go32
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go10
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go2
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go409
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go81
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go306
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go225
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go18
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go86
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go2
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go68
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go2
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go34
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go158
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go317
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go30
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go701
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go6
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go8
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go15
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go2
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go8
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/ginkgo.go21
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go141
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go1
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go14
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go5
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go4
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go238
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool.go21
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go33
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go16
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go32
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go365
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/decode.go292
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/encode.go258
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy.go68
-rw-r--r--Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy_test.go364
73 files changed, 7989 insertions, 3091 deletions
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go
index 0d7911eca..ccf390c9c 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go
@@ -8,65 +8,84 @@ package leveldb
import (
"encoding/binary"
- "errors"
+ "fmt"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/memdb"
)
-var (
- errBatchTooShort = errors.New("leveldb: batch is too short")
- errBatchBadRecord = errors.New("leveldb: bad record in batch")
-)
+type ErrBatchCorrupted struct {
+ Reason string
+}
+
+func (e *ErrBatchCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason)
+}
+
+func newErrBatchCorrupted(reason string) error {
+ return errors.NewErrCorrupted(nil, &ErrBatchCorrupted{reason})
+}
-const kBatchHdrLen = 8 + 4
+const (
+ batchHdrLen = 8 + 4
+ batchGrowRec = 3000
+)
-type batchReplay interface {
- put(key, value []byte, seq uint64)
- delete(key []byte, seq uint64)
+type BatchReplay interface {
+ Put(key, value []byte)
+ Delete(key []byte)
}
// Batch is a write batch.
type Batch struct {
- buf []byte
+ data []byte
rLen, bLen int
seq uint64
sync bool
}
func (b *Batch) grow(n int) {
- off := len(b.buf)
+ off := len(b.data)
if off == 0 {
- // include headers
- off = kBatchHdrLen
- n += off
+ off = batchHdrLen
+ if b.data != nil {
+ b.data = b.data[:off]
+ }
}
- if cap(b.buf)-off >= n {
- return
+ if cap(b.data)-off < n {
+ if b.data == nil {
+ b.data = make([]byte, off, off+n)
+ } else {
+ odata := b.data
+ div := 1
+ if b.rLen > batchGrowRec {
+ div = b.rLen / batchGrowRec
+ }
+ b.data = make([]byte, off, off+n+(off-batchHdrLen)/div)
+ copy(b.data, odata)
+ }
}
- buf := make([]byte, 2*cap(b.buf)+n)
- copy(buf, b.buf)
- b.buf = buf[:off]
}
-func (b *Batch) appendRec(t vType, key, value []byte) {
+func (b *Batch) appendRec(kt kType, key, value []byte) {
n := 1 + binary.MaxVarintLen32 + len(key)
- if t == tVal {
+ if kt == ktVal {
n += binary.MaxVarintLen32 + len(value)
}
b.grow(n)
- off := len(b.buf)
- buf := b.buf[:off+n]
- buf[off] = byte(t)
+ off := len(b.data)
+ data := b.data[:off+n]
+ data[off] = byte(kt)
off += 1
- off += binary.PutUvarint(buf[off:], uint64(len(key)))
- copy(buf[off:], key)
+ off += binary.PutUvarint(data[off:], uint64(len(key)))
+ copy(data[off:], key)
off += len(key)
- if t == tVal {
- off += binary.PutUvarint(buf[off:], uint64(len(value)))
- copy(buf[off:], value)
+ if kt == ktVal {
+ off += binary.PutUvarint(data[off:], uint64(len(value)))
+ copy(data[off:], value)
off += len(value)
}
- b.buf = buf[:off]
+ b.data = data[:off]
b.rLen++
// Include 8-byte ikey header
b.bLen += len(key) + len(value) + 8
@@ -75,18 +94,51 @@ func (b *Batch) appendRec(t vType, key, value []byte) {
// Put appends 'put operation' of the given key/value pair to the batch.
// It is safe to modify the contents of the argument after Put returns.
func (b *Batch) Put(key, value []byte) {
- b.appendRec(tVal, key, value)
+ b.appendRec(ktVal, key, value)
}
// Delete appends 'delete operation' of the given key to the batch.
// It is safe to modify the contents of the argument after Delete returns.
func (b *Batch) Delete(key []byte) {
- b.appendRec(tDel, key, nil)
+ b.appendRec(ktDel, key, nil)
+}
+
+// Dump dumps batch contents. The returned slice can be loaded into the
+// batch using Load method.
+// The returned slice is not its own copy, so the contents should not be
+// modified.
+func (b *Batch) Dump() []byte {
+ return b.encode()
+}
+
+// Load loads given slice into the batch. Previous contents of the batch
+// will be discarded.
+// The given slice will not be copied and will be used as batch buffer, so
+// it is not safe to modify the contents of the slice.
+func (b *Batch) Load(data []byte) error {
+ return b.decode(0, data)
+}
+
+// Replay replays batch contents.
+func (b *Batch) Replay(r BatchReplay) error {
+ return b.decodeRec(func(i int, kt kType, key, value []byte) {
+ switch kt {
+ case ktVal:
+ r.Put(key, value)
+ case ktDel:
+ r.Delete(key)
+ }
+ })
+}
+
+// Len returns number of records in the batch.
+func (b *Batch) Len() int {
+ return b.rLen
}
// Reset resets the batch.
func (b *Batch) Reset() {
- b.buf = nil
+ b.data = b.data[:0]
b.seq = 0
b.rLen = 0
b.bLen = 0
@@ -97,24 +149,10 @@ func (b *Batch) init(sync bool) {
b.sync = sync
}
-func (b *Batch) put(key, value []byte, seq uint64) {
- if b.rLen == 0 {
- b.seq = seq
- }
- b.Put(key, value)
-}
-
-func (b *Batch) delete(key []byte, seq uint64) {
- if b.rLen == 0 {
- b.seq = seq
- }
- b.Delete(key)
-}
-
func (b *Batch) append(p *Batch) {
if p.rLen > 0 {
- b.grow(len(p.buf) - kBatchHdrLen)
- b.buf = append(b.buf, p.buf[kBatchHdrLen:]...)
+ b.grow(len(p.data) - batchHdrLen)
+ b.data = append(b.data, p.data[batchHdrLen:]...)
b.rLen += p.rLen
}
if p.sync {
@@ -122,95 +160,93 @@ func (b *Batch) append(p *Batch) {
}
}
-func (b *Batch) len() int {
- return b.rLen
-}
-
+// size returns sums of key/value pair length plus 8-bytes ikey.
func (b *Batch) size() int {
return b.bLen
}
func (b *Batch) encode() []byte {
b.grow(0)
- binary.LittleEndian.PutUint64(b.buf, b.seq)
- binary.LittleEndian.PutUint32(b.buf[8:], uint32(b.rLen))
+ binary.LittleEndian.PutUint64(b.data, b.seq)
+ binary.LittleEndian.PutUint32(b.data[8:], uint32(b.rLen))
- return b.buf
+ return b.data
}
-func (b *Batch) decode(buf []byte) error {
- if len(buf) < kBatchHdrLen {
- return errBatchTooShort
+func (b *Batch) decode(prevSeq uint64, data []byte) error {
+ if len(data) < batchHdrLen {
+ return newErrBatchCorrupted("too short")
}
- b.seq = binary.LittleEndian.Uint64(buf)
- b.rLen = int(binary.LittleEndian.Uint32(buf[8:]))
+ b.seq = binary.LittleEndian.Uint64(data)
+ if b.seq < prevSeq {
+ return newErrBatchCorrupted("invalid sequence number")
+ }
+ b.rLen = int(binary.LittleEndian.Uint32(data[8:]))
+ if b.rLen < 0 {
+ return newErrBatchCorrupted("invalid records length")
+ }
// No need to be precise at this point, it won't be used anyway
- b.bLen = len(buf) - kBatchHdrLen
- b.buf = buf
+ b.bLen = len(data) - batchHdrLen
+ b.data = data
return nil
}
-func (b *Batch) decodeRec(f func(i int, t vType, key, value []byte)) error {
- off := kBatchHdrLen
+func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error) {
+ off := batchHdrLen
for i := 0; i < b.rLen; i++ {
- if off >= len(b.buf) {
- return errors.New("leveldb: invalid batch record length")
+ if off >= len(b.data) {
+ return newErrBatchCorrupted("invalid records length")
}
- t := vType(b.buf[off])
- if t > tVal {
- return errors.New("leveldb: invalid batch record type in batch")
+ kt := kType(b.data[off])
+ if kt > ktVal {
+ return newErrBatchCorrupted("bad record: invalid type")
}
off += 1
- x, n := binary.Uvarint(b.buf[off:])
+ x, n := binary.Uvarint(b.data[off:])
off += n
- if n <= 0 || off+int(x) > len(b.buf) {
- return errBatchBadRecord
+ if n <= 0 || off+int(x) > len(b.data) {
+ return newErrBatchCorrupted("bad record: invalid key length")
}
- key := b.buf[off : off+int(x)]
+ key := b.data[off : off+int(x)]
off += int(x)
-
var value []byte
- if t == tVal {
- x, n := binary.Uvarint(b.buf[off:])
+ if kt == ktVal {
+ x, n := binary.Uvarint(b.data[off:])
off += n
- if n <= 0 || off+int(x) > len(b.buf) {
- return errBatchBadRecord
+ if n <= 0 || off+int(x) > len(b.data) {
+ return newErrBatchCorrupted("bad record: invalid value length")
}
- value = b.buf[off : off+int(x)]
+ value = b.data[off : off+int(x)]
off += int(x)
}
- f(i, t, key, value)
+ f(i, kt, key, value)
}
return nil
}
-func (b *Batch) replay(to batchReplay) error {
- return b.decodeRec(func(i int, t vType, key, value []byte) {
- switch t {
- case tVal:
- to.put(key, value, b.seq+uint64(i))
- case tDel:
- to.delete(key, b.seq+uint64(i))
- }
- })
-}
-
func (b *Batch) memReplay(to *memdb.DB) error {
- return b.decodeRec(func(i int, t vType, key, value []byte) {
- ikey := newIKey(key, b.seq+uint64(i), t)
+ return b.decodeRec(func(i int, kt kType, key, value []byte) {
+ ikey := newIkey(key, b.seq+uint64(i), kt)
to.Put(ikey, value)
})
}
+func (b *Batch) memDecodeAndReplay(prevSeq uint64, data []byte, to *memdb.DB) error {
+ if err := b.decode(prevSeq, data); err != nil {
+ return err
+ }
+ return b.memReplay(to)
+}
+
func (b *Batch) revertMemReplay(to *memdb.DB) error {
- return b.decodeRec(func(i int, t vType, key, value []byte) {
- ikey := newIKey(key, b.seq+uint64(i), t)
+ return b.decodeRec(func(i int, kt kType, key, value []byte) {
+ ikey := newIkey(key, b.seq+uint64(i), kt)
to.Delete(ikey)
})
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go
index 19b749b8f..7fc842f4f 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go
@@ -15,7 +15,7 @@ import (
)
type tbRec struct {
- t vType
+ kt kType
key, value []byte
}
@@ -23,39 +23,39 @@ type testBatch struct {
rec []*tbRec
}
-func (p *testBatch) put(key, value []byte, seq uint64) {
- p.rec = append(p.rec, &tbRec{tVal, key, value})
+func (p *testBatch) Put(key, value []byte) {
+ p.rec = append(p.rec, &tbRec{ktVal, key, value})
}
-func (p *testBatch) delete(key []byte, seq uint64) {
- p.rec = append(p.rec, &tbRec{tDel, key, nil})
+func (p *testBatch) Delete(key []byte) {
+ p.rec = append(p.rec, &tbRec{ktDel, key, nil})
}
func compareBatch(t *testing.T, b1, b2 *Batch) {
if b1.seq != b2.seq {
t.Errorf("invalid seq number want %d, got %d", b1.seq, b2.seq)
}
- if b1.len() != b2.len() {
- t.Fatalf("invalid record length want %d, got %d", b1.len(), b2.len())
+ if b1.Len() != b2.Len() {
+ t.Fatalf("invalid record length want %d, got %d", b1.Len(), b2.Len())
}
p1, p2 := new(testBatch), new(testBatch)
- err := b1.replay(p1)
+ err := b1.Replay(p1)
if err != nil {
t.Fatal("error when replaying batch 1: ", err)
}
- err = b2.replay(p2)
+ err = b2.Replay(p2)
if err != nil {
t.Fatal("error when replaying batch 2: ", err)
}
for i := range p1.rec {
r1, r2 := p1.rec[i], p2.rec[i]
- if r1.t != r2.t {
- t.Errorf("invalid type on record '%d' want %d, got %d", i, r1.t, r2.t)
+ if r1.kt != r2.kt {
+ t.Errorf("invalid type on record '%d' want %d, got %d", i, r1.kt, r2.kt)
}
if !bytes.Equal(r1.key, r2.key) {
t.Errorf("invalid key on record '%d' want %s, got %s", i, string(r1.key), string(r2.key))
}
- if r1.t == tVal {
+ if r1.kt == ktVal {
if !bytes.Equal(r1.value, r2.value) {
t.Errorf("invalid value on record '%d' want %s, got %s", i, string(r1.value), string(r2.value))
}
@@ -75,7 +75,7 @@ func TestBatch_EncodeDecode(t *testing.T) {
b1.Delete([]byte("k"))
buf := b1.encode()
b2 := new(Batch)
- err := b2.decode(buf)
+ err := b2.decode(0, buf)
if err != nil {
t.Error("error when decoding batch: ", err)
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench2_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench2_test.go
new file mode 100644
index 000000000..0dd60fd82
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench2_test.go
@@ -0,0 +1,58 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build !go1.2
+
+package leveldb
+
+import (
+ "sync/atomic"
+ "testing"
+)
+
+func BenchmarkDBReadConcurrent(b *testing.B) {
+ p := openDBBench(b, false)
+ p.populate(b.N)
+ p.fill()
+ p.gc()
+ defer p.close()
+
+ b.ResetTimer()
+ b.SetBytes(116)
+
+ b.RunParallel(func(pb *testing.PB) {
+ iter := p.newIter()
+ defer iter.Release()
+ for pb.Next() && iter.Next() {
+ }
+ })
+}
+
+func BenchmarkDBReadConcurrent2(b *testing.B) {
+ p := openDBBench(b, false)
+ p.populate(b.N)
+ p.fill()
+ p.gc()
+ defer p.close()
+
+ b.ResetTimer()
+ b.SetBytes(116)
+
+ var dir uint32
+ b.RunParallel(func(pb *testing.PB) {
+ iter := p.newIter()
+ defer iter.Release()
+ if atomic.AddUint32(&dir, 1)%2 == 0 {
+ for pb.Next() && iter.Next() {
+ }
+ } else {
+ if pb.Next() && iter.Last() {
+ for pb.Next() && iter.Prev() {
+ }
+ }
+ }
+ })
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go
index ea6801a89..91b426709 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go
@@ -170,7 +170,7 @@ func (p *dbBench) writes(perBatch int) {
b.SetBytes(116)
}
-func (p *dbBench) drop() {
+func (p *dbBench) gc() {
p.keys, p.values = nil, nil
runtime.GC()
}
@@ -249,6 +249,9 @@ func (p *dbBench) newIter() iterator.Iterator {
}
func (p *dbBench) close() {
+ if bp, err := p.db.GetProperty("leveldb.blockpool"); err == nil {
+ p.b.Log("Block pool stats: ", bp)
+ }
p.db.Close()
p.stor.Close()
os.RemoveAll(benchDB)
@@ -331,7 +334,7 @@ func BenchmarkDBRead(b *testing.B) {
p := openDBBench(b, false)
p.populate(b.N)
p.fill()
- p.drop()
+ p.gc()
iter := p.newIter()
b.ResetTimer()
@@ -362,7 +365,7 @@ func BenchmarkDBReadUncompressed(b *testing.B) {
p := openDBBench(b, true)
p.populate(b.N)
p.fill()
- p.drop()
+ p.gc()
iter := p.newIter()
b.ResetTimer()
@@ -379,7 +382,7 @@ func BenchmarkDBReadTable(b *testing.B) {
p.populate(b.N)
p.fill()
p.reopen()
- p.drop()
+ p.gc()
iter := p.newIter()
b.ResetTimer()
@@ -395,7 +398,7 @@ func BenchmarkDBReadReverse(b *testing.B) {
p := openDBBench(b, false)
p.populate(b.N)
p.fill()
- p.drop()
+ p.gc()
iter := p.newIter()
b.ResetTimer()
@@ -413,7 +416,7 @@ func BenchmarkDBReadReverseTable(b *testing.B) {
p.populate(b.N)
p.fill()
p.reopen()
- p.drop()
+ p.gc()
iter := p.newIter()
b.ResetTimer()
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/bench2_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/bench2_test.go
new file mode 100644
index 000000000..175e22203
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/bench2_test.go
@@ -0,0 +1,30 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build !go1.2
+
+package cache
+
+import (
+ "math/rand"
+ "testing"
+)
+
+func BenchmarkLRUCache(b *testing.B) {
+ c := NewCache(NewLRU(10000))
+
+ b.SetParallelism(10)
+ b.RunParallel(func(pb *testing.PB) {
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+ for pb.Next() {
+ key := uint64(r.Intn(1000000))
+ c.Get(0, key, func() (int, Value) {
+ return 1, key
+ }).Release()
+ }
+ })
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go
index 9b6a74977..c9670de5d 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go
@@ -8,118 +8,669 @@
package cache
import (
+ "sync"
"sync/atomic"
+ "unsafe"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
)
-// SetFunc used by Namespace.Get method to create a cache object. SetFunc
-// may return ok false, in that case the cache object will not be created.
-type SetFunc func() (ok bool, value interface{}, charge int, fin SetFin)
+// Cacher provides interface to implements a caching functionality.
+// An implementation must be goroutine-safe.
+type Cacher interface {
+ // Capacity returns cache capacity.
+ Capacity() int
-// SetFin will be called when corresponding cache object are released.
-type SetFin func()
+ // SetCapacity sets cache capacity.
+ SetCapacity(capacity int)
-// DelFin will be called when corresponding cache object are released.
-// DelFin will be called after SetFin. The exist is true if the corresponding
-// cache object is actually exist in the cache tree.
-type DelFin func(exist bool)
+ // Promote promotes the 'cache node'.
+ Promote(n *Node)
-// PurgeFin will be called when corresponding cache object are released.
-// PurgeFin will be called after SetFin. If PurgeFin present DelFin will
-// not be executed but passed to the PurgeFin, it is up to the caller
-// to call it or not.
-type PurgeFin func(ns, key uint64, delfin DelFin)
+ // Ban evicts the 'cache node' and prevent subsequent 'promote'.
+ Ban(n *Node)
-// Cache is a cache tree.
-type Cache interface {
- // SetCapacity sets cache capacity.
- SetCapacity(capacity int)
+ // Evict evicts the 'cache node'.
+ Evict(n *Node)
- // GetNamespace gets or creates a cache namespace for the given id.
- GetNamespace(id uint64) Namespace
+ // EvictNS evicts 'cache node' with the given namespace.
+ EvictNS(ns uint64)
- // Purge purges all cache namespaces, read Namespace.Purge method documentation.
- Purge(fin PurgeFin)
+ // EvictAll evicts all 'cache node'.
+ EvictAll()
+
+ // Close closes the 'cache tree'
+ Close() error
+}
+
+// Value is a 'cacheable object'. It may implements util.Releaser, if
+// so the the Release method will be called once object is released.
+type Value interface{}
+
+type CacheGetter struct {
+ Cache *Cache
+ NS uint64
+}
- // Zap zaps all cache namespaces, read Namespace.Zap method documentation.
- Zap(closed bool)
+func (g *CacheGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
+ return g.Cache.Get(g.NS, key, setFunc)
}
-// Namespace is a cache namespace.
-type Namespace interface {
- // Get gets cache object for the given key. The given SetFunc (if not nil) will
- // be called if the given key does not exist.
- // If the given key does not exist, SetFunc is nil or SetFunc return ok false, Get
- // will return ok false.
- Get(key uint64, setf SetFunc) (obj Object, ok bool)
+// The hash tables implementation is based on:
+// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu, Kunlong Zhang, and Michael Spear. ACM Symposium on Principles of Distributed Computing, Jul 2014.
- // Get deletes cache object for the given key. If exist the cache object will
- // be deleted later when all of its handles have been released (i.e. no one use
- // it anymore) and the given DelFin (if not nil) will finally be executed. If
- // such cache object does not exist the given DelFin will be executed anyway.
- //
- // Delete returns true if such cache object exist.
- Delete(key uint64, fin DelFin) bool
+const (
+ mInitialSize = 1 << 4
+ mOverflowThreshold = 1 << 5
+ mOverflowGrowThreshold = 1 << 7
+)
- // Purge deletes all cache objects, read Delete method documentation.
- Purge(fin PurgeFin)
+type mBucket struct {
+ mu sync.Mutex
+ node []*Node
+ frozen bool
+}
- // Zap detaches the namespace from the cache tree and delete all its cache
- // objects. The cache objects deletion and finalizers execution are happen
- // immediately, even if its existing handles haven't yet been released.
- // A zapped namespace can't never be filled again.
- // If closed is false then the Get function will always call the given SetFunc
- // if it is not nil, but resultant of the SetFunc will not be cached.
- Zap(closed bool)
+func (b *mBucket) freeze() []*Node {
+ b.mu.Lock()
+ defer b.mu.Unlock()
+ if !b.frozen {
+ b.frozen = true
+ }
+ return b.node
}
-// Object is a cache object.
-type Object interface {
- // Release releases the cache object. Other methods should not be called
- // after the cache object has been released.
- Release()
+func (b *mBucket) get(r *Cache, h *mNode, hash uint32, ns, key uint64, noset bool) (done, added bool, n *Node) {
+ b.mu.Lock()
+
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
+
+ // Scan the node.
+ for _, n := range b.node {
+ if n.hash == hash && n.ns == ns && n.key == key {
+ atomic.AddInt32(&n.ref, 1)
+ b.mu.Unlock()
+ return true, false, n
+ }
+ }
+
+ // Get only.
+ if noset {
+ b.mu.Unlock()
+ return true, false, nil
+ }
+
+ // Create node.
+ n = &Node{
+ r: r,
+ hash: hash,
+ ns: ns,
+ key: key,
+ ref: 1,
+ }
+ // Add node to bucket.
+ b.node = append(b.node, n)
+ bLen := len(b.node)
+ b.mu.Unlock()
+
+ // Update counter.
+ grow := atomic.AddInt32(&r.nodes, 1) >= h.growThreshold
+ if bLen > mOverflowThreshold {
+ grow = grow || atomic.AddInt32(&h.overflow, 1) >= mOverflowGrowThreshold
+ }
- // Value returns value of the cache object.
- Value() interface{}
+ // Grow.
+ if grow && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) << 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+
+ return true, true, n
}
-// Namespace state.
-type nsState int
+func (b *mBucket) delete(r *Cache, h *mNode, hash uint32, ns, key uint64) (done, deleted bool) {
+ b.mu.Lock()
-const (
- nsEffective nsState = iota
- nsZapped
- nsClosed
-)
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
-// Node state.
-type nodeState int
+ // Scan the node.
+ var (
+ n *Node
+ bLen int
+ )
+ for i := range b.node {
+ n = b.node[i]
+ if n.ns == ns && n.key == key {
+ if atomic.LoadInt32(&n.ref) == 0 {
+ deleted = true
-const (
- nodeEffective nodeState = iota
- nodeEvicted
- nodeRemoved
-)
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Remove node from bucket.
+ b.node = append(b.node[:i], b.node[i+1:]...)
+ bLen = len(b.node)
+ }
+ break
+ }
+ }
+ b.mu.Unlock()
-// Fake object.
-type fakeObject struct {
- value interface{}
- fin func()
- once uint32
+ if deleted {
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+
+ // Update counter.
+ atomic.AddInt32(&r.size, int32(n.size)*-1)
+ shrink := atomic.AddInt32(&r.nodes, -1) < h.shrinkThreshold
+ if bLen >= mOverflowThreshold {
+ atomic.AddInt32(&h.overflow, -1)
+ }
+
+ // Shrink.
+ if shrink && len(h.buckets) > mInitialSize && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) >> 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+ }
+
+ return true, deleted
}
-func (o *fakeObject) Value() interface{} {
- if atomic.LoadUint32(&o.once) == 0 {
- return o.value
+type mNode struct {
+ buckets []unsafe.Pointer // []*mBucket
+ mask uint32
+ pred unsafe.Pointer // *mNode
+ resizeInProgess int32
+
+ overflow int32
+ growThreshold int32
+ shrinkThreshold int32
+}
+
+func (n *mNode) initBucket(i uint32) *mBucket {
+ if b := (*mBucket)(atomic.LoadPointer(&n.buckets[i])); b != nil {
+ return b
+ }
+
+ p := (*mNode)(atomic.LoadPointer(&n.pred))
+ if p != nil {
+ var node []*Node
+ if n.mask > p.mask {
+ // Grow.
+ pb := (*mBucket)(atomic.LoadPointer(&p.buckets[i&p.mask]))
+ if pb == nil {
+ pb = p.initBucket(i & p.mask)
+ }
+ m := pb.freeze()
+ // Split nodes.
+ for _, x := range m {
+ if x.hash&n.mask == i {
+ node = append(node, x)
+ }
+ }
+ } else {
+ // Shrink.
+ pb0 := (*mBucket)(atomic.LoadPointer(&p.buckets[i]))
+ if pb0 == nil {
+ pb0 = p.initBucket(i)
+ }
+ pb1 := (*mBucket)(atomic.LoadPointer(&p.buckets[i+uint32(len(n.buckets))]))
+ if pb1 == nil {
+ pb1 = p.initBucket(i + uint32(len(n.buckets)))
+ }
+ m0 := pb0.freeze()
+ m1 := pb1.freeze()
+ // Merge nodes.
+ node = make([]*Node, 0, len(m0)+len(m1))
+ node = append(node, m0...)
+ node = append(node, m1...)
+ }
+ b := &mBucket{node: node}
+ if atomic.CompareAndSwapPointer(&n.buckets[i], nil, unsafe.Pointer(b)) {
+ if len(node) > mOverflowThreshold {
+ atomic.AddInt32(&n.overflow, int32(len(node)-mOverflowThreshold))
+ }
+ return b
+ }
+ }
+
+ return (*mBucket)(atomic.LoadPointer(&n.buckets[i]))
+}
+
+func (n *mNode) initBuckets() {
+ for i := range n.buckets {
+ n.initBucket(uint32(i))
+ }
+ atomic.StorePointer(&n.pred, nil)
+}
+
+// Cache is a 'cache map'.
+type Cache struct {
+ mu sync.RWMutex
+ mHead unsafe.Pointer // *mNode
+ nodes int32
+ size int32
+ cacher Cacher
+ closed bool
+}
+
+// NewCache creates a new 'cache map'. The cacher is optional and
+// may be nil.
+func NewCache(cacher Cacher) *Cache {
+ h := &mNode{
+ buckets: make([]unsafe.Pointer, mInitialSize),
+ mask: mInitialSize - 1,
+ growThreshold: int32(mInitialSize * mOverflowThreshold),
+ shrinkThreshold: 0,
+ }
+ for i := range h.buckets {
+ h.buckets[i] = unsafe.Pointer(&mBucket{})
+ }
+ r := &Cache{
+ mHead: unsafe.Pointer(h),
+ cacher: cacher,
+ }
+ return r
+}
+
+func (r *Cache) getBucket(hash uint32) (*mNode, *mBucket) {
+ h := (*mNode)(atomic.LoadPointer(&r.mHead))
+ i := hash & h.mask
+ b := (*mBucket)(atomic.LoadPointer(&h.buckets[i]))
+ if b == nil {
+ b = h.initBucket(i)
+ }
+ return h, b
+}
+
+func (r *Cache) delete(n *Node) bool {
+ for {
+ h, b := r.getBucket(n.hash)
+ done, deleted := b.delete(r, h, n.hash, n.ns, n.key)
+ if done {
+ return deleted
+ }
+ }
+ return false
+}
+
+// Nodes returns number of 'cache node' in the map.
+func (r *Cache) Nodes() int {
+ return int(atomic.LoadInt32(&r.nodes))
+}
+
+// Size returns sums of 'cache node' size in the map.
+func (r *Cache) Size() int {
+ return int(atomic.LoadInt32(&r.size))
+}
+
+// Capacity returns cache capacity.
+func (r *Cache) Capacity() int {
+ if r.cacher == nil {
+ return 0
+ }
+ return r.cacher.Capacity()
+}
+
+// SetCapacity sets cache capacity.
+func (r *Cache) SetCapacity(capacity int) {
+ if r.cacher != nil {
+ r.cacher.SetCapacity(capacity)
+ }
+}
+
+// Get gets 'cache node' with the given namespace and key.
+// If cache node is not found and setFunc is not nil, Get will atomically creates
+// the 'cache node' by calling setFunc. Otherwise Get will returns nil.
+//
+// The returned 'cache handle' should be released after use by calling Release
+// method.
+func (r *Cache) Get(ns, key uint64, setFunc func() (size int, value Value)) *Handle {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return nil
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, setFunc == nil)
+ if done {
+ if n != nil {
+ n.mu.Lock()
+ if n.value == nil {
+ if setFunc == nil {
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+
+ n.size, n.value = setFunc()
+ if n.value == nil {
+ n.size = 0
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+ atomic.AddInt32(&r.size, int32(n.size))
+ }
+ n.mu.Unlock()
+ if r.cacher != nil {
+ r.cacher.Promote(n)
+ }
+ return &Handle{unsafe.Pointer(n)}
+ }
+
+ break
+ }
}
return nil
}
-func (o *fakeObject) Release() {
- if !atomic.CompareAndSwapUint32(&o.once, 0, 1) {
+// Delete removes and ban 'cache node' with the given namespace and key.
+// A banned 'cache node' will never inserted into the 'cache tree'. Ban
+// only attributed to the particular 'cache node', so when a 'cache node'
+// is recreated it will not be banned.
+//
+// If onDel is not nil, then it will be executed if such 'cache node'
+// doesn't exist or once the 'cache node' is released.
+//
+// Delete return true is such 'cache node' exist.
+func (r *Cache) Delete(ns, key uint64, onDel func()) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if onDel != nil {
+ n.mu.Lock()
+ n.onDel = append(n.onDel, onDel)
+ n.mu.Unlock()
+ }
+ if r.cacher != nil {
+ r.cacher.Ban(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ if onDel != nil {
+ onDel()
+ }
+
+ return false
+}
+
+// Evict evicts 'cache node' with the given namespace and key. This will
+// simply call Cacher.Evict.
+//
+// Evict return true is such 'cache node' exist.
+func (r *Cache) Evict(ns, key uint64) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if r.cacher != nil {
+ r.cacher.Evict(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ return false
+}
+
+// EvictNS evicts 'cache node' with the given namespace. This will
+// simply call Cacher.EvictNS.
+func (r *Cache) EvictNS(ns uint64) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return
+ }
+
+ if r.cacher != nil {
+ r.cacher.EvictNS(ns)
+ }
+}
+
+// EvictAll evicts all 'cache node'. This will simply call Cacher.EvictAll.
+func (r *Cache) EvictAll() {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
return
}
- if o.fin != nil {
- o.fin()
- o.fin = nil
+
+ if r.cacher != nil {
+ r.cacher.EvictAll()
+ }
+}
+
+// Close closes the 'cache map' and releases all 'cache node'.
+func (r *Cache) Close() error {
+ r.mu.Lock()
+ if !r.closed {
+ r.closed = true
+
+ if r.cacher != nil {
+ if err := r.cacher.Close(); err != nil {
+ return err
+ }
+ }
+
+ h := (*mNode)(r.mHead)
+ h.initBuckets()
+
+ for i := range h.buckets {
+ b := (*mBucket)(h.buckets[i])
+ for _, n := range b.node {
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+ }
+ }
}
+ r.mu.Unlock()
+ return nil
+}
+
+// Node is a 'cache node'.
+type Node struct {
+ r *Cache
+
+ hash uint32
+ ns, key uint64
+
+ mu sync.Mutex
+ size int
+ value Value
+
+ ref int32
+ onDel []func()
+
+ CacheData unsafe.Pointer
+}
+
+// NS returns this 'cache node' namespace.
+func (n *Node) NS() uint64 {
+ return n.ns
+}
+
+// Key returns this 'cache node' key.
+func (n *Node) Key() uint64 {
+ return n.key
+}
+
+// Size returns this 'cache node' size.
+func (n *Node) Size() int {
+ return n.size
+}
+
+// Value returns this 'cache node' value.
+func (n *Node) Value() Value {
+ return n.value
+}
+
+// Ref returns this 'cache node' ref counter.
+func (n *Node) Ref() int32 {
+ return atomic.LoadInt32(&n.ref)
+}
+
+// GetHandle returns an handle for this 'cache node'.
+func (n *Node) GetHandle() *Handle {
+ if atomic.AddInt32(&n.ref, 1) <= 1 {
+ panic("BUG: Node.GetHandle on zero ref")
+ }
+ return &Handle{unsafe.Pointer(n)}
+}
+
+func (n *Node) unref() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.delete(n)
+ }
+}
+
+func (n *Node) unrefLocked() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.mu.RLock()
+ if !n.r.closed {
+ n.r.delete(n)
+ }
+ n.r.mu.RUnlock()
+ }
+}
+
+type Handle struct {
+ n unsafe.Pointer // *Node
+}
+
+func (h *Handle) Value() Value {
+ n := (*Node)(atomic.LoadPointer(&h.n))
+ if n != nil {
+ return n.value
+ }
+ return nil
+}
+
+func (h *Handle) Release() {
+ nPtr := atomic.LoadPointer(&h.n)
+ if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) {
+ n := (*Node)(nPtr)
+ n.unrefLocked()
+ }
+}
+
+func murmur32(ns, key uint64, seed uint32) uint32 {
+ const (
+ m = uint32(0x5bd1e995)
+ r = 24
+ )
+
+ k1 := uint32(ns >> 32)
+ k2 := uint32(ns)
+ k3 := uint32(key >> 32)
+ k4 := uint32(key)
+
+ k1 *= m
+ k1 ^= k1 >> r
+ k1 *= m
+
+ k2 *= m
+ k2 ^= k2 >> r
+ k2 *= m
+
+ k3 *= m
+ k3 ^= k3 >> r
+ k3 *= m
+
+ k4 *= m
+ k4 ^= k4 >> r
+ k4 *= m
+
+ h := seed
+
+ h *= m
+ h ^= k1
+ h *= m
+ h ^= k2
+ h *= m
+ h ^= k3
+ h *= m
+ h ^= k4
+
+ h ^= h >> 13
+ h *= m
+ h ^= h >> 15
+
+ return h
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go
index 07a9939b2..c2a50156f 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go
@@ -8,17 +8,289 @@ package cache
import (
"math/rand"
+ "runtime"
+ "sync"
+ "sync/atomic"
"testing"
+ "time"
+ "unsafe"
)
-func set(ns Namespace, key uint64, value interface{}, charge int, fin func()) Object {
- obj, _ := ns.Get(key, func() (bool, interface{}, int, SetFin) {
- return true, value, charge, fin
+type int32o int32
+
+func (o *int32o) acquire() {
+ if atomic.AddInt32((*int32)(o), 1) != 1 {
+ panic("BUG: invalid ref")
+ }
+}
+
+func (o *int32o) Release() {
+ if atomic.AddInt32((*int32)(o), -1) != 0 {
+ panic("BUG: invalid ref")
+ }
+}
+
+type releaserFunc struct {
+ fn func()
+ value Value
+}
+
+func (r releaserFunc) Release() {
+ if r.fn != nil {
+ r.fn()
+ }
+}
+
+func set(c *Cache, ns, key uint64, value Value, charge int, relf func()) *Handle {
+ return c.Get(ns, key, func() (int, Value) {
+ if relf != nil {
+ return charge, releaserFunc{relf, value}
+ } else {
+ return charge, value
+ }
+ })
+}
+
+func TestCacheMap(t *testing.T) {
+ runtime.GOMAXPROCS(runtime.NumCPU())
+
+ nsx := []struct {
+ nobjects, nhandles, concurrent, repeat int
+ }{
+ {10000, 400, 50, 3},
+ {100000, 1000, 100, 10},
+ }
+
+ var (
+ objects [][]int32o
+ handles [][]unsafe.Pointer
+ )
+
+ for _, x := range nsx {
+ objects = append(objects, make([]int32o, x.nobjects))
+ handles = append(handles, make([]unsafe.Pointer, x.nhandles))
+ }
+
+ c := NewCache(nil)
+
+ wg := new(sync.WaitGroup)
+ var done int32
+
+ for ns, x := range nsx {
+ for i := 0; i < x.concurrent; i++ {
+ wg.Add(1)
+ go func(ns, i, repeat int, objects []int32o, handles []unsafe.Pointer) {
+ defer wg.Done()
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+ for j := len(objects) * repeat; j >= 0; j-- {
+ key := uint64(r.Intn(len(objects)))
+ h := c.Get(uint64(ns), key, func() (int, Value) {
+ o := &objects[key]
+ o.acquire()
+ return 1, o
+ })
+ if v := h.Value().(*int32o); v != &objects[key] {
+ t.Fatalf("#%d invalid value: want=%p got=%p", ns, &objects[key], v)
+ }
+ if objects[key] != 1 {
+ t.Fatalf("#%d invalid object %d: %d", ns, key, objects[key])
+ }
+ if !atomic.CompareAndSwapPointer(&handles[r.Intn(len(handles))], nil, unsafe.Pointer(h)) {
+ h.Release()
+ }
+ }
+ }(ns, i, x.repeat, objects[ns], handles[ns])
+ }
+
+ go func(handles []unsafe.Pointer) {
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+ for atomic.LoadInt32(&done) == 0 {
+ i := r.Intn(len(handles))
+ h := (*Handle)(atomic.LoadPointer(&handles[i]))
+ if h != nil && atomic.CompareAndSwapPointer(&handles[i], unsafe.Pointer(h), nil) {
+ h.Release()
+ }
+ time.Sleep(time.Millisecond)
+ }
+ }(handles[ns])
+ }
+
+ go func() {
+ handles := make([]*Handle, 100000)
+ for atomic.LoadInt32(&done) == 0 {
+ for i := range handles {
+ handles[i] = c.Get(999999999, uint64(i), func() (int, Value) {
+ return 1, 1
+ })
+ }
+ for _, h := range handles {
+ h.Release()
+ }
+ }
+ }()
+
+ wg.Wait()
+
+ atomic.StoreInt32(&done, 1)
+
+ for _, handles0 := range handles {
+ for i := range handles0 {
+ h := (*Handle)(atomic.LoadPointer(&handles0[i]))
+ if h != nil && atomic.CompareAndSwapPointer(&handles0[i], unsafe.Pointer(h), nil) {
+ h.Release()
+ }
+ }
+ }
+
+ for ns, objects0 := range objects {
+ for i, o := range objects0 {
+ if o != 0 {
+ t.Fatalf("invalid object #%d.%d: ref=%d", ns, i, o)
+ }
+ }
+ }
+}
+
+func TestCacheMap_NodesAndSize(t *testing.T) {
+ c := NewCache(nil)
+ if c.Nodes() != 0 {
+ t.Errorf("invalid nodes counter: want=%d got=%d", 0, c.Nodes())
+ }
+ if c.Size() != 0 {
+ t.Errorf("invalid size counter: want=%d got=%d", 0, c.Size())
+ }
+ set(c, 0, 1, 1, 1, nil)
+ set(c, 0, 2, 2, 2, nil)
+ set(c, 1, 1, 3, 3, nil)
+ set(c, 2, 1, 4, 1, nil)
+ if c.Nodes() != 4 {
+ t.Errorf("invalid nodes counter: want=%d got=%d", 4, c.Nodes())
+ }
+ if c.Size() != 7 {
+ t.Errorf("invalid size counter: want=%d got=%d", 4, c.Size())
+ }
+}
+
+func TestLRUCache_Capacity(t *testing.T) {
+ c := NewCache(NewLRU(10))
+ if c.Capacity() != 10 {
+ t.Errorf("invalid capacity: want=%d got=%d", 10, c.Capacity())
+ }
+ set(c, 0, 1, 1, 1, nil).Release()
+ set(c, 0, 2, 2, 2, nil).Release()
+ set(c, 1, 1, 3, 3, nil).Release()
+ set(c, 2, 1, 4, 1, nil).Release()
+ set(c, 2, 2, 5, 1, nil).Release()
+ set(c, 2, 3, 6, 1, nil).Release()
+ set(c, 2, 4, 7, 1, nil).Release()
+ set(c, 2, 5, 8, 1, nil).Release()
+ if c.Nodes() != 7 {
+ t.Errorf("invalid nodes counter: want=%d got=%d", 7, c.Nodes())
+ }
+ if c.Size() != 10 {
+ t.Errorf("invalid size counter: want=%d got=%d", 10, c.Size())
+ }
+ c.SetCapacity(9)
+ if c.Capacity() != 9 {
+ t.Errorf("invalid capacity: want=%d got=%d", 9, c.Capacity())
+ }
+ if c.Nodes() != 6 {
+ t.Errorf("invalid nodes counter: want=%d got=%d", 6, c.Nodes())
+ }
+ if c.Size() != 8 {
+ t.Errorf("invalid size counter: want=%d got=%d", 8, c.Size())
+ }
+}
+
+func TestCacheMap_NilValue(t *testing.T) {
+ c := NewCache(NewLRU(10))
+ h := c.Get(0, 0, func() (size int, value Value) {
+ return 1, nil
})
- return obj
+ if h != nil {
+ t.Error("cache handle is non-nil")
+ }
+ if c.Nodes() != 0 {
+ t.Errorf("invalid nodes counter: want=%d got=%d", 0, c.Nodes())
+ }
+ if c.Size() != 0 {
+ t.Errorf("invalid size counter: want=%d got=%d", 0, c.Size())
+ }
}
-func TestCache_HitMiss(t *testing.T) {
+func TestLRUCache_GetLatency(t *testing.T) {
+ runtime.GOMAXPROCS(runtime.NumCPU())
+
+ const (
+ concurrentSet = 30
+ concurrentGet = 3
+ duration = 3 * time.Second
+ delay = 3 * time.Millisecond
+ maxkey = 100000
+ )
+
+ var (
+ set, getHit, getAll int32
+ getMaxLatency, getDuration int64
+ )
+
+ c := NewCache(NewLRU(5000))
+ wg := &sync.WaitGroup{}
+ until := time.Now().Add(duration)
+ for i := 0; i < concurrentSet; i++ {
+ wg.Add(1)
+ go func(i int) {
+ defer wg.Done()
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+ for time.Now().Before(until) {
+ c.Get(0, uint64(r.Intn(maxkey)), func() (int, Value) {
+ time.Sleep(delay)
+ atomic.AddInt32(&set, 1)
+ return 1, 1
+ }).Release()
+ }
+ }(i)
+ }
+ for i := 0; i < concurrentGet; i++ {
+ wg.Add(1)
+ go func(i int) {
+ defer wg.Done()
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+ for {
+ mark := time.Now()
+ if mark.Before(until) {
+ h := c.Get(0, uint64(r.Intn(maxkey)), nil)
+ latency := int64(time.Now().Sub(mark))
+ m := atomic.LoadInt64(&getMaxLatency)
+ if latency > m {
+ atomic.CompareAndSwapInt64(&getMaxLatency, m, latency)
+ }
+ atomic.AddInt64(&getDuration, latency)
+ if h != nil {
+ atomic.AddInt32(&getHit, 1)
+ h.Release()
+ }
+ atomic.AddInt32(&getAll, 1)
+ } else {
+ break
+ }
+ }
+ }(i)
+ }
+
+ wg.Wait()
+ getAvglatency := time.Duration(getDuration) / time.Duration(getAll)
+ t.Logf("set=%d getHit=%d getAll=%d getMaxLatency=%v getAvgLatency=%v",
+ set, getHit, getAll, time.Duration(getMaxLatency), getAvglatency)
+
+ if getAvglatency > delay/3 {
+ t.Errorf("get avg latency > %v: got=%v", delay/3, getAvglatency)
+ }
+}
+
+func TestLRUCache_HitMiss(t *testing.T) {
cases := []struct {
key uint64
value string
@@ -36,36 +308,37 @@ func TestCache_HitMiss(t *testing.T) {
}
setfin := 0
- c := NewLRUCache(1000)
- ns := c.GetNamespace(0)
+ c := NewCache(NewLRU(1000))
for i, x := range cases {
- set(ns, x.key, x.value, len(x.value), func() {
+ set(c, 0, x.key, x.value, len(x.value), func() {
setfin++
}).Release()
for j, y := range cases {
- r, ok := ns.Get(y.key, nil)
+ h := c.Get(0, y.key, nil)
if j <= i {
// should hit
- if !ok {
+ if h == nil {
t.Errorf("case '%d' iteration '%d' is miss", i, j)
- } else if r.Value().(string) != y.value {
- t.Errorf("case '%d' iteration '%d' has invalid value got '%s', want '%s'", i, j, r.Value().(string), y.value)
+ } else {
+ if x := h.Value().(releaserFunc).value.(string); x != y.value {
+ t.Errorf("case '%d' iteration '%d' has invalid value got '%s', want '%s'", i, j, x, y.value)
+ }
}
} else {
// should miss
- if ok {
- t.Errorf("case '%d' iteration '%d' is hit , value '%s'", i, j, r.Value().(string))
+ if h != nil {
+ t.Errorf("case '%d' iteration '%d' is hit , value '%s'", i, j, h.Value().(releaserFunc).value.(string))
}
}
- if ok {
- r.Release()
+ if h != nil {
+ h.Release()
}
}
}
for i, x := range cases {
finalizerOk := false
- ns.Delete(x.key, func(exist bool) {
+ c.Delete(0, x.key, func() {
finalizerOk = true
})
@@ -74,22 +347,24 @@ func TestCache_HitMiss(t *testing.T) {
}
for j, y := range cases {
- r, ok := ns.Get(y.key, nil)
+ h := c.Get(0, y.key, nil)
if j > i {
// should hit
- if !ok {
+ if h == nil {
t.Errorf("case '%d' iteration '%d' is miss", i, j)
- } else if r.Value().(string) != y.value {
- t.Errorf("case '%d' iteration '%d' has invalid value got '%s', want '%s'", i, j, r.Value().(string), y.value)
+ } else {
+ if x := h.Value().(releaserFunc).value.(string); x != y.value {
+ t.Errorf("case '%d' iteration '%d' has invalid value got '%s', want '%s'", i, j, x, y.value)
+ }
}
} else {
// should miss
- if ok {
- t.Errorf("case '%d' iteration '%d' is hit, value '%s'", i, j, r.Value().(string))
+ if h != nil {
+ t.Errorf("case '%d' iteration '%d' is hit, value '%s'", i, j, h.Value().(releaserFunc).value.(string))
}
}
- if ok {
- r.Release()
+ if h != nil {
+ h.Release()
}
}
}
@@ -100,137 +375,180 @@ func TestCache_HitMiss(t *testing.T) {
}
func TestLRUCache_Eviction(t *testing.T) {
- c := NewLRUCache(12)
- ns := c.GetNamespace(0)
- o1 := set(ns, 1, 1, 1, nil)
- set(ns, 2, 2, 1, nil).Release()
- set(ns, 3, 3, 1, nil).Release()
- set(ns, 4, 4, 1, nil).Release()
- set(ns, 5, 5, 1, nil).Release()
- if r, ok := ns.Get(2, nil); ok { // 1,3,4,5,2
- r.Release()
- }
- set(ns, 9, 9, 10, nil).Release() // 5,2,9
-
- for _, x := range []uint64{9, 2, 5, 1} {
- r, ok := ns.Get(x, nil)
- if !ok {
- t.Errorf("miss for key '%d'", x)
+ c := NewCache(NewLRU(12))
+ o1 := set(c, 0, 1, 1, 1, nil)
+ set(c, 0, 2, 2, 1, nil).Release()
+ set(c, 0, 3, 3, 1, nil).Release()
+ set(c, 0, 4, 4, 1, nil).Release()
+ set(c, 0, 5, 5, 1, nil).Release()
+ if h := c.Get(0, 2, nil); h != nil { // 1,3,4,5,2
+ h.Release()
+ }
+ set(c, 0, 9, 9, 10, nil).Release() // 5,2,9
+
+ for _, key := range []uint64{9, 2, 5, 1} {
+ h := c.Get(0, key, nil)
+ if h == nil {
+ t.Errorf("miss for key '%d'", key)
} else {
- if r.Value().(int) != int(x) {
- t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int))
+ if x := h.Value().(int); x != int(key) {
+ t.Errorf("invalid value for key '%d' want '%d', got '%d'", key, key, x)
}
- r.Release()
+ h.Release()
}
}
o1.Release()
- for _, x := range []uint64{1, 2, 5} {
- r, ok := ns.Get(x, nil)
- if !ok {
- t.Errorf("miss for key '%d'", x)
+ for _, key := range []uint64{1, 2, 5} {
+ h := c.Get(0, key, nil)
+ if h == nil {
+ t.Errorf("miss for key '%d'", key)
} else {
- if r.Value().(int) != int(x) {
- t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int))
+ if x := h.Value().(int); x != int(key) {
+ t.Errorf("invalid value for key '%d' want '%d', got '%d'", key, key, x)
}
- r.Release()
+ h.Release()
}
}
- for _, x := range []uint64{3, 4, 9} {
- r, ok := ns.Get(x, nil)
- if ok {
- t.Errorf("hit for key '%d'", x)
- if r.Value().(int) != int(x) {
- t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int))
+ for _, key := range []uint64{3, 4, 9} {
+ h := c.Get(0, key, nil)
+ if h != nil {
+ t.Errorf("hit for key '%d'", key)
+ if x := h.Value().(int); x != int(key) {
+ t.Errorf("invalid value for key '%d' want '%d', got '%d'", key, key, x)
}
- r.Release()
+ h.Release()
}
}
}
-func TestLRUCache_SetGet(t *testing.T) {
- c := NewLRUCache(13)
- ns := c.GetNamespace(0)
- for i := 0; i < 200; i++ {
- n := uint64(rand.Intn(99999) % 20)
- set(ns, n, n, 1, nil).Release()
- if p, ok := ns.Get(n, nil); ok {
- if p.Value() == nil {
- t.Errorf("key '%d' contains nil value", n)
+func TestLRUCache_Evict(t *testing.T) {
+ c := NewCache(NewLRU(6))
+ set(c, 0, 1, 1, 1, nil).Release()
+ set(c, 0, 2, 2, 1, nil).Release()
+ set(c, 1, 1, 4, 1, nil).Release()
+ set(c, 1, 2, 5, 1, nil).Release()
+ set(c, 2, 1, 6, 1, nil).Release()
+ set(c, 2, 2, 7, 1, nil).Release()
+
+ for ns := 0; ns < 3; ns++ {
+ for key := 1; key < 3; key++ {
+ if h := c.Get(uint64(ns), uint64(key), nil); h != nil {
+ h.Release()
} else {
- got := p.Value().(uint64)
- if got != n {
- t.Errorf("invalid value for key '%d' want '%d', got '%d'", n, n, got)
- }
+ t.Errorf("Cache.Get on #%d.%d return nil", ns, key)
}
- p.Release()
- } else {
- t.Errorf("key '%d' doesn't exist", n)
}
}
-}
-func TestLRUCache_Purge(t *testing.T) {
- c := NewLRUCache(3)
- ns1 := c.GetNamespace(0)
- o1 := set(ns1, 1, 1, 1, nil)
- o2 := set(ns1, 2, 2, 1, nil)
- ns1.Purge(nil)
- set(ns1, 3, 3, 1, nil).Release()
- for _, x := range []uint64{1, 2, 3} {
- r, ok := ns1.Get(x, nil)
- if !ok {
- t.Errorf("miss for key '%d'", x)
- } else {
- if r.Value().(int) != int(x) {
- t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int))
+ if ok := c.Evict(0, 1); !ok {
+ t.Error("first Cache.Evict on #0.1 return false")
+ }
+ if ok := c.Evict(0, 1); ok {
+ t.Error("second Cache.Evict on #0.1 return true")
+ }
+ if h := c.Get(0, 1, nil); h != nil {
+ t.Errorf("Cache.Get on #0.1 return non-nil: %v", h.Value())
+ }
+
+ c.EvictNS(1)
+ if h := c.Get(1, 1, nil); h != nil {
+ t.Errorf("Cache.Get on #1.1 return non-nil: %v", h.Value())
+ }
+ if h := c.Get(1, 2, nil); h != nil {
+ t.Errorf("Cache.Get on #1.2 return non-nil: %v", h.Value())
+ }
+
+ c.EvictAll()
+ for ns := 0; ns < 3; ns++ {
+ for key := 1; key < 3; key++ {
+ if h := c.Get(uint64(ns), uint64(key), nil); h != nil {
+ t.Errorf("Cache.Get on #%d.%d return non-nil: %v", ns, key, h.Value())
}
- r.Release()
}
}
- o1.Release()
- o2.Release()
- for _, x := range []uint64{1, 2} {
- r, ok := ns1.Get(x, nil)
- if ok {
- t.Errorf("hit for key '%d'", x)
- if r.Value().(int) != int(x) {
- t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int))
- }
- r.Release()
+}
+
+func TestLRUCache_Delete(t *testing.T) {
+ delFuncCalled := 0
+ delFunc := func() {
+ delFuncCalled++
+ }
+
+ c := NewCache(NewLRU(2))
+ set(c, 0, 1, 1, 1, nil).Release()
+ set(c, 0, 2, 2, 1, nil).Release()
+
+ if ok := c.Delete(0, 1, delFunc); !ok {
+ t.Error("Cache.Delete on #1 return false")
+ }
+ if h := c.Get(0, 1, nil); h != nil {
+ t.Errorf("Cache.Get on #1 return non-nil: %v", h.Value())
+ }
+ if ok := c.Delete(0, 1, delFunc); ok {
+ t.Error("Cache.Delete on #1 return true")
+ }
+
+ h2 := c.Get(0, 2, nil)
+ if h2 == nil {
+ t.Error("Cache.Get on #2 return nil")
+ }
+ if ok := c.Delete(0, 2, delFunc); !ok {
+ t.Error("(1) Cache.Delete on #2 return false")
+ }
+ if ok := c.Delete(0, 2, delFunc); !ok {
+ t.Error("(2) Cache.Delete on #2 return false")
+ }
+
+ set(c, 0, 3, 3, 1, nil).Release()
+ set(c, 0, 4, 4, 1, nil).Release()
+ c.Get(0, 2, nil).Release()
+
+ for key := 2; key <= 4; key++ {
+ if h := c.Get(0, uint64(key), nil); h != nil {
+ h.Release()
+ } else {
+ t.Errorf("Cache.Get on #%d return nil", key)
}
}
-}
-func BenchmarkLRUCache_SetRelease(b *testing.B) {
- capacity := b.N / 100
- if capacity <= 0 {
- capacity = 10
+ h2.Release()
+ if h := c.Get(0, 2, nil); h != nil {
+ t.Errorf("Cache.Get on #2 return non-nil: %v", h.Value())
}
- c := NewLRUCache(capacity)
- ns := c.GetNamespace(0)
- b.ResetTimer()
- for i := uint64(0); i < uint64(b.N); i++ {
- set(ns, i, nil, 1, nil).Release()
+
+ if delFuncCalled != 4 {
+ t.Errorf("delFunc isn't called 4 times: got=%d", delFuncCalled)
}
}
-func BenchmarkLRUCache_SetReleaseTwice(b *testing.B) {
- capacity := b.N / 100
- if capacity <= 0 {
- capacity = 10
+func TestLRUCache_Close(t *testing.T) {
+ relFuncCalled := 0
+ relFunc := func() {
+ relFuncCalled++
+ }
+ delFuncCalled := 0
+ delFunc := func() {
+ delFuncCalled++
}
- c := NewLRUCache(capacity)
- ns := c.GetNamespace(0)
- b.ResetTimer()
- na := b.N / 2
- nb := b.N - na
+ c := NewCache(NewLRU(2))
+ set(c, 0, 1, 1, 1, relFunc).Release()
+ set(c, 0, 2, 2, 1, relFunc).Release()
- for i := uint64(0); i < uint64(na); i++ {
- set(ns, i, nil, 1, nil).Release()
+ h3 := set(c, 0, 3, 3, 1, relFunc)
+ if h3 == nil {
+ t.Error("Cache.Get on #3 return nil")
}
+ if ok := c.Delete(0, 3, delFunc); !ok {
+ t.Error("Cache.Delete on #3 return false")
+ }
+
+ c.Close()
- for i := uint64(0); i < uint64(nb); i++ {
- set(ns, i, nil, 1, nil).Release()
+ if relFuncCalled != 3 {
+ t.Errorf("relFunc isn't called 3 times: got=%d", relFuncCalled)
+ }
+ if delFuncCalled != 1 {
+ t.Errorf("delFunc isn't called 1 times: got=%d", delFuncCalled)
}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/empty_cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/empty_cache.go
deleted file mode 100644
index 1fbf81459..000000000
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/empty_cache.go
+++ /dev/null
@@ -1,246 +0,0 @@
-// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
-// All rights reserved.
-//
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-package cache
-
-import (
- "sync"
- "sync/atomic"
-)
-
-type emptyCache struct {
- sync.Mutex
- table map[uint64]*emptyNS
-}
-
-// NewEmptyCache creates a new initialized empty cache.
-func NewEmptyCache() Cache {
- return &emptyCache{
- table: make(map[uint64]*emptyNS),
- }
-}
-
-func (c *emptyCache) GetNamespace(id uint64) Namespace {
- c.Lock()
- defer c.Unlock()
-
- if ns, ok := c.table[id]; ok {
- return ns
- }
-
- ns := &emptyNS{
- cache: c,
- id: id,
- table: make(map[uint64]*emptyNode),
- }
- c.table[id] = ns
- return ns
-}
-
-func (c *emptyCache) Purge(fin PurgeFin) {
- c.Lock()
- for _, ns := range c.table {
- ns.purgeNB(fin)
- }
- c.Unlock()
-}
-
-func (c *emptyCache) Zap(closed bool) {
- c.Lock()
- for _, ns := range c.table {
- ns.zapNB(closed)
- }
- c.table = make(map[uint64]*emptyNS)
- c.Unlock()
-}
-
-func (*emptyCache) SetCapacity(capacity int) {}
-
-type emptyNS struct {
- cache *emptyCache
- id uint64
- table map[uint64]*emptyNode
- state nsState
-}
-
-func (ns *emptyNS) Get(key uint64, setf SetFunc) (o Object, ok bool) {
- ns.cache.Lock()
-
- switch ns.state {
- case nsZapped:
- ns.cache.Unlock()
- if setf == nil {
- return
- }
-
- var value interface{}
- var fin func()
- ok, value, _, fin = setf()
- if ok {
- o = &fakeObject{
- value: value,
- fin: fin,
- }
- }
- return
- case nsClosed:
- ns.cache.Unlock()
- return
- }
-
- n, ok := ns.table[key]
- if ok {
- n.ref++
- } else {
- if setf == nil {
- ns.cache.Unlock()
- return
- }
-
- var value interface{}
- var fin func()
- ok, value, _, fin = setf()
- if !ok {
- ns.cache.Unlock()
- return
- }
-
- n = &emptyNode{
- ns: ns,
- key: key,
- value: value,
- setfin: fin,
- ref: 1,
- }
- ns.table[key] = n
- }
-
- ns.cache.Unlock()
- o = &emptyObject{node: n}
- return
-}
-
-func (ns *emptyNS) Delete(key uint64, fin DelFin) bool {
- ns.cache.Lock()
-
- if ns.state != nsEffective {
- ns.cache.Unlock()
- if fin != nil {
- fin(false)
- }
- return false
- }
-
- n, ok := ns.table[key]
- if !ok {
- ns.cache.Unlock()
- if fin != nil {
- fin(false)
- }
- return false
- }
- n.delfin = fin
- ns.cache.Unlock()
- return true
-}
-
-func (ns *emptyNS) purgeNB(fin PurgeFin) {
- if ns.state != nsEffective {
- return
- }
- for _, n := range ns.table {
- n.purgefin = fin
- }
-}
-
-func (ns *emptyNS) Purge(fin PurgeFin) {
- ns.cache.Lock()
- ns.purgeNB(fin)
- ns.cache.Unlock()
-}
-
-func (ns *emptyNS) zapNB(closed bool) {
- if ns.state != nsEffective {
- return
- }
- for _, n := range ns.table {
- n.execFin()
- }
- if closed {
- ns.state = nsClosed
- } else {
- ns.state = nsZapped
- }
- ns.table = nil
-}
-
-func (ns *emptyNS) Zap(closed bool) {
- ns.cache.Lock()
- ns.zapNB(closed)
- delete(ns.cache.table, ns.id)
- ns.cache.Unlock()
-}
-
-type emptyNode struct {
- ns *emptyNS
- key uint64
- value interface{}
- ref int
- setfin SetFin
- delfin DelFin
- purgefin PurgeFin
-}
-
-func (n *emptyNode) execFin() {
- if n.setfin != nil {
- n.setfin()
- n.setfin = nil
- }
- if n.purgefin != nil {
- n.purgefin(n.ns.id, n.key, n.delfin)
- n.delfin = nil
- n.purgefin = nil
- } else if n.delfin != nil {
- n.delfin(true)
- n.delfin = nil
- }
-}
-
-func (n *emptyNode) evict() {
- n.ns.cache.Lock()
- n.ref--
- if n.ref == 0 {
- if n.ns.state == nsEffective {
- // Remove elem.
- delete(n.ns.table, n.key)
- // Execute finalizer.
- n.execFin()
- }
- } else if n.ref < 0 {
- panic("leveldb/cache: emptyNode: negative node reference")
- }
- n.ns.cache.Unlock()
-}
-
-type emptyObject struct {
- node *emptyNode
- once uint32
-}
-
-func (o *emptyObject) Value() interface{} {
- if atomic.LoadUint32(&o.once) == 0 {
- return o.node.value
- }
- return nil
-}
-
-func (o *emptyObject) Release() {
- if !atomic.CompareAndSwapUint32(&o.once, 0, 1) {
- return
- }
- o.node.evict()
- o.node = nil
-}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru.go
new file mode 100644
index 000000000..d9a84cde1
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru.go
@@ -0,0 +1,195 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package cache
+
+import (
+ "sync"
+ "unsafe"
+)
+
+type lruNode struct {
+ n *Node
+ h *Handle
+ ban bool
+
+ next, prev *lruNode
+}
+
+func (n *lruNode) insert(at *lruNode) {
+ x := at.next
+ at.next = n
+ n.prev = at
+ n.next = x
+ x.prev = n
+}
+
+func (n *lruNode) remove() {
+ if n.prev != nil {
+ n.prev.next = n.next
+ n.next.prev = n.prev
+ n.prev = nil
+ n.next = nil
+ } else {
+ panic("BUG: removing removed node")
+ }
+}
+
+type lru struct {
+ mu sync.Mutex
+ capacity int
+ used int
+ recent lruNode
+}
+
+func (r *lru) reset() {
+ r.recent.next = &r.recent
+ r.recent.prev = &r.recent
+ r.used = 0
+}
+
+func (r *lru) Capacity() int {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ return r.capacity
+}
+
+func (r *lru) SetCapacity(capacity int) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ r.capacity = capacity
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Promote(n *Node) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ if n.CacheData == nil {
+ if n.Size() <= r.capacity {
+ rn := &lruNode{n: n, h: n.GetHandle()}
+ rn.insert(&r.recent)
+ n.CacheData = unsafe.Pointer(rn)
+ r.used += n.Size()
+
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.insert(&r.recent)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Ban(n *Node) {
+ r.mu.Lock()
+ if n.CacheData == nil {
+ n.CacheData = unsafe.Pointer(&lruNode{n: n, ban: true})
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.ban = true
+ r.used -= rn.n.Size()
+ r.mu.Unlock()
+
+ rn.h.Release()
+ rn.h = nil
+ return
+ }
+ }
+ r.mu.Unlock()
+}
+
+func (r *lru) Evict(n *Node) {
+ r.mu.Lock()
+ rn := (*lruNode)(n.CacheData)
+ if rn == nil || rn.ban {
+ r.mu.Unlock()
+ return
+ }
+ n.CacheData = nil
+ r.mu.Unlock()
+
+ rn.h.Release()
+}
+
+func (r *lru) EvictNS(ns uint64) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ for e := r.recent.prev; e != &r.recent; {
+ rn := e
+ e = e.prev
+ if rn.n.NS() == ns {
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) EvictAll() {
+ r.mu.Lock()
+ back := r.recent.prev
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.n.CacheData = nil
+ }
+ r.reset()
+ r.mu.Unlock()
+
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Close() error {
+ return nil
+}
+
+// NewLRU create a new LRU-cache.
+func NewLRU(capacity int) Cacher {
+ r := &lru{capacity: capacity}
+ r.reset()
+ return r
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go
deleted file mode 100644
index 3c98e076b..000000000
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go
+++ /dev/null
@@ -1,354 +0,0 @@
-// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
-// All rights reserved.
-//
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-package cache
-
-import (
- "sync"
- "sync/atomic"
-)
-
-// lruCache represent a LRU cache state.
-type lruCache struct {
- sync.Mutex
-
- recent lruNode
- table map[uint64]*lruNs
- capacity int
- size int
-}
-
-// NewLRUCache creates a new initialized LRU cache with the given capacity.
-func NewLRUCache(capacity int) Cache {
- c := &lruCache{
- table: make(map[uint64]*lruNs),
- capacity: capacity,
- }
- c.recent.rNext = &c.recent
- c.recent.rPrev = &c.recent
- return c
-}
-
-// SetCapacity set cache capacity.
-func (c *lruCache) SetCapacity(capacity int) {
- c.Lock()
- c.capacity = capacity
- c.evict()
- c.Unlock()
-}
-
-// GetNamespace return namespace object for given id.
-func (c *lruCache) GetNamespace(id uint64) Namespace {
- c.Lock()
- defer c.Unlock()
-
- if p, ok := c.table[id]; ok {
- return p
- }
-
- p := &lruNs{
- lru: c,
- id: id,
- table: make(map[uint64]*lruNode),
- }
- c.table[id] = p
- return p
-}
-
-// Purge purge entire cache.
-func (c *lruCache) Purge(fin PurgeFin) {
- c.Lock()
- for _, ns := range c.table {
- ns.purgeNB(fin)
- }
- c.Unlock()
-}
-
-func (c *lruCache) Zap(closed bool) {
- c.Lock()
- for _, ns := range c.table {
- ns.zapNB(closed)
- }
- c.table = make(map[uint64]*lruNs)
- c.Unlock()
-}
-
-func (c *lruCache) evict() {
- top := &c.recent
- for n := c.recent.rPrev; c.size > c.capacity && n != top; {
- n.state = nodeEvicted
- n.rRemove()
- n.evictNB()
- c.size -= n.charge
- n = c.recent.rPrev
- }
-}
-
-type lruNs struct {
- lru *lruCache
- id uint64
- table map[uint64]*lruNode
- state nsState
-}
-
-func (ns *lruNs) Get(key uint64, setf SetFunc) (o Object, ok bool) {
- lru := ns.lru
- lru.Lock()
-
- switch ns.state {
- case nsZapped:
- lru.Unlock()
- if setf == nil {
- return
- }
-
- var value interface{}
- var fin func()
- ok, value, _, fin = setf()
- if ok {
- o = &fakeObject{
- value: value,
- fin: fin,
- }
- }
- return
- case nsClosed:
- lru.Unlock()
- return
- }
-
- n, ok := ns.table[key]
- if ok {
- switch n.state {
- case nodeEvicted:
- // Insert to recent list.
- n.state = nodeEffective
- n.ref++
- lru.size += n.charge
- lru.evict()
- fallthrough
- case nodeEffective:
- // Bump to front
- n.rRemove()
- n.rInsert(&lru.recent)
- }
- n.ref++
- } else {
- if setf == nil {
- lru.Unlock()
- return
- }
-
- var value interface{}
- var charge int
- var fin func()
- ok, value, charge, fin = setf()
- if !ok {
- lru.Unlock()
- return
- }
-
- n = &lruNode{
- ns: ns,
- key: key,
- value: value,
- charge: charge,
- setfin: fin,
- ref: 2,
- }
- ns.table[key] = n
- n.rInsert(&lru.recent)
-
- lru.size += charge
- lru.evict()
- }
-
- lru.Unlock()
- o = &lruObject{node: n}
- return
-}
-
-func (ns *lruNs) Delete(key uint64, fin DelFin) bool {
- lru := ns.lru
- lru.Lock()
-
- if ns.state != nsEffective {
- lru.Unlock()
- if fin != nil {
- fin(false)
- }
- return false
- }
-
- n, ok := ns.table[key]
- if !ok {
- lru.Unlock()
- if fin != nil {
- fin(false)
- }
- return false
- }
-
- n.delfin = fin
- switch n.state {
- case nodeRemoved:
- lru.Unlock()
- return false
- case nodeEffective:
- lru.size -= n.charge
- n.rRemove()
- n.evictNB()
- }
- n.state = nodeRemoved
-
- lru.Unlock()
- return true
-}
-
-func (ns *lruNs) purgeNB(fin PurgeFin) {
- lru := ns.lru
- if ns.state != nsEffective {
- return
- }
-
- for _, n := range ns.table {
- n.purgefin = fin
- if n.state == nodeEffective {
- lru.size -= n.charge
- n.rRemove()
- n.evictNB()
- }
- n.state = nodeRemoved
- }
-}
-
-func (ns *lruNs) Purge(fin PurgeFin) {
- ns.lru.Lock()
- ns.purgeNB(fin)
- ns.lru.Unlock()
-}
-
-func (ns *lruNs) zapNB(closed bool) {
- lru := ns.lru
- if ns.state != nsEffective {
- return
- }
-
- if closed {
- ns.state = nsClosed
- } else {
- ns.state = nsZapped
- }
- for _, n := range ns.table {
- if n.state == nodeEffective {
- lru.size -= n.charge
- n.rRemove()
- }
- n.state = nodeRemoved
- n.execFin()
- }
- ns.table = nil
-}
-
-func (ns *lruNs) Zap(closed bool) {
- ns.lru.Lock()
- ns.zapNB(closed)
- delete(ns.lru.table, ns.id)
- ns.lru.Unlock()
-}
-
-type lruNode struct {
- ns *lruNs
-
- rNext, rPrev *lruNode
-
- key uint64
- value interface{}
- charge int
- ref int
- state nodeState
- setfin SetFin
- delfin DelFin
- purgefin PurgeFin
-}
-
-func (n *lruNode) rInsert(at *lruNode) {
- x := at.rNext
- at.rNext = n
- n.rPrev = at
- n.rNext = x
- x.rPrev = n
-}
-
-func (n *lruNode) rRemove() bool {
- // only remove if not already removed
- if n.rPrev == nil {
- return false
- }
-
- n.rPrev.rNext = n.rNext
- n.rNext.rPrev = n.rPrev
- n.rPrev = nil
- n.rNext = nil
-
- return true
-}
-
-func (n *lruNode) execFin() {
- if n.setfin != nil {
- n.setfin()
- n.setfin = nil
- }
- if n.purgefin != nil {
- n.purgefin(n.ns.id, n.key, n.delfin)
- n.delfin = nil
- n.purgefin = nil
- } else if n.delfin != nil {
- n.delfin(true)
- n.delfin = nil
- }
-}
-
-func (n *lruNode) evictNB() {
- n.ref--
- if n.ref == 0 {
- if n.ns.state == nsEffective {
- // remove elem
- delete(n.ns.table, n.key)
- // execute finalizer
- n.execFin()
- }
- } else if n.ref < 0 {
- panic("leveldb/cache: lruCache: negative node reference")
- }
-}
-
-func (n *lruNode) evict() {
- n.ns.lru.Lock()
- n.evictNB()
- n.ns.lru.Unlock()
-}
-
-type lruObject struct {
- node *lruNode
- once uint32
-}
-
-func (o *lruObject) Value() interface{} {
- if atomic.LoadUint32(&o.once) == 0 {
- return o.node.value
- }
- return nil
-}
-
-func (o *lruObject) Release() {
- if !atomic.CompareAndSwapUint32(&o.once, 0, 1) {
- return
- }
-
- o.node.evict()
- o.node = nil
-}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go
deleted file mode 100644
index 511058897..000000000
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
-// All rights reserved.
-//
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-package leveldb
-
-const (
- kNumLevels = 7
-
- // Level-0 compaction is started when we hit this many files.
- kL0_CompactionTrigger float64 = 4
-
- // Soft limit on number of level-0 files. We slow down writes at this point.
- kL0_SlowdownWritesTrigger = 8
-
- // Maximum number of level-0 files. We stop writes at this point.
- kL0_StopWritesTrigger = 12
-
- // Maximum level to which a new compacted memdb is pushed if it
- // does not create overlap. We try to push to level 2 to avoid the
- // relatively expensive level 0=>1 compactions and to avoid some
- // expensive manifest file operations. We do not push all the way to
- // the largest level since that can generate a lot of wasted disk
- // space if the same key space is being repeatedly overwritten.
- kMaxMemCompactLevel = 2
-
- // Maximum size of a table.
- kMaxTableSize = 2 * 1048576
-
- // Maximum bytes of overlaps in grandparent (i.e., level+2) before we
- // stop building a single file in a level->level+1 compaction.
- kMaxGrandParentOverlapBytes = 10 * kMaxTableSize
-
- // Maximum number of bytes in all compacted files. We avoid expanding
- // the lower level file set of a compaction if it would make the
- // total compaction cover more than this many bytes.
- kExpCompactionMaxBytes = 25 * kMaxTableSize
-)
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go
index a036e0893..a351874ed 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go
@@ -9,13 +9,12 @@ package leveldb
import (
"bytes"
"fmt"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
"io"
"math/rand"
"testing"
-
- "github.com/syndtr/goleveldb/leveldb/cache"
- "github.com/syndtr/goleveldb/leveldb/opt"
- "github.com/syndtr/goleveldb/leveldb/storage"
)
const ctValSize = 1000
@@ -32,8 +31,8 @@ func newDbCorruptHarnessWopt(t *testing.T, o *opt.Options) *dbCorruptHarness {
func newDbCorruptHarness(t *testing.T) *dbCorruptHarness {
return newDbCorruptHarnessWopt(t, &opt.Options{
- BlockCache: cache.NewLRUCache(100),
- Strict: opt.StrictJournalChecksum,
+ BlockCacheCapacity: 100,
+ Strict: opt.StrictJournalChecksum,
})
}
@@ -96,21 +95,22 @@ func (h *dbCorruptHarness) deleteRand(n, max int, rnd *rand.Rand) {
}
}
-func (h *dbCorruptHarness) corrupt(ft storage.FileType, offset, n int) {
+func (h *dbCorruptHarness) corrupt(ft storage.FileType, fi, offset, n int) {
p := &h.dbHarness
t := p.t
- var file storage.File
ff, _ := p.stor.GetFiles(ft)
- for _, f := range ff {
- if file == nil || f.Num() > file.Num() {
- file = f
- }
+ sff := files(ff)
+ sff.sort()
+ if fi < 0 {
+ fi = len(sff) - 1
}
- if file == nil {
- t.Fatalf("no such file with type %q", ft)
+ if fi >= len(sff) {
+ t.Fatalf("no such file with type %q with index %d", ft, fi)
}
+ file := sff[fi]
+
r, err := file.Open()
if err != nil {
t.Fatal("cannot open file: ", err)
@@ -225,8 +225,8 @@ func TestCorruptDB_Journal(t *testing.T) {
h.build(100)
h.check(100, 100)
h.closeDB()
- h.corrupt(storage.TypeJournal, 19, 1)
- h.corrupt(storage.TypeJournal, 32*1024+1000, 1)
+ h.corrupt(storage.TypeJournal, -1, 19, 1)
+ h.corrupt(storage.TypeJournal, -1, 32*1024+1000, 1)
h.openDB()
h.check(36, 36)
@@ -242,7 +242,7 @@ func TestCorruptDB_Table(t *testing.T) {
h.compactRangeAt(0, "", "")
h.compactRangeAt(1, "", "")
h.closeDB()
- h.corrupt(storage.TypeTable, 100, 1)
+ h.corrupt(storage.TypeTable, -1, 100, 1)
h.openDB()
h.check(99, 99)
@@ -256,7 +256,7 @@ func TestCorruptDB_TableIndex(t *testing.T) {
h.build(10000)
h.compactMem()
h.closeDB()
- h.corrupt(storage.TypeTable, -2000, 500)
+ h.corrupt(storage.TypeTable, -1, -2000, 500)
h.openDB()
h.check(5000, 9999)
@@ -267,9 +267,9 @@ func TestCorruptDB_TableIndex(t *testing.T) {
func TestCorruptDB_MissingManifest(t *testing.T) {
rnd := rand.New(rand.NewSource(0x0badda7a))
h := newDbCorruptHarnessWopt(t, &opt.Options{
- BlockCache: cache.NewLRUCache(100),
- Strict: opt.StrictJournalChecksum,
- WriteBuffer: 1000 * 60,
+ BlockCacheCapacity: 100,
+ Strict: opt.StrictJournalChecksum,
+ WriteBuffer: 1000 * 60,
})
h.build(1000)
@@ -355,7 +355,7 @@ func TestCorruptDB_CorruptedManifest(t *testing.T) {
h.compactMem()
h.compactRange("", "")
h.closeDB()
- h.corrupt(storage.TypeManifest, 0, 1000)
+ h.corrupt(storage.TypeManifest, -1, 0, 1000)
h.openAssert(false)
h.recover()
@@ -370,7 +370,7 @@ func TestCorruptDB_CompactionInputError(t *testing.T) {
h.build(10)
h.compactMem()
h.closeDB()
- h.corrupt(storage.TypeTable, 100, 1)
+ h.corrupt(storage.TypeTable, -1, 100, 1)
h.openDB()
h.check(9, 9)
@@ -387,7 +387,7 @@ func TestCorruptDB_UnrelatedKeys(t *testing.T) {
h.build(10)
h.compactMem()
h.closeDB()
- h.corrupt(storage.TypeTable, 100, 1)
+ h.corrupt(storage.TypeTable, -1, 100, 1)
h.openDB()
h.put(string(tkey(1000)), string(tval(1000, ctValSize)))
@@ -470,3 +470,31 @@ func TestCorruptDB_MissingTableFiles(t *testing.T) {
h.close()
}
+
+func TestCorruptDB_RecoverTable(t *testing.T) {
+ h := newDbCorruptHarnessWopt(t, &opt.Options{
+ WriteBuffer: 112 * opt.KiB,
+ CompactionTableSize: 90 * opt.KiB,
+ Filter: filter.NewBloomFilter(10),
+ })
+
+ h.build(1000)
+ h.compactMem()
+ h.compactRangeAt(0, "", "")
+ h.compactRangeAt(1, "", "")
+ seq := h.db.seq
+ h.closeDB()
+ h.corrupt(storage.TypeTable, 0, 1000, 1)
+ h.corrupt(storage.TypeTable, 3, 10000, 1)
+ // Corrupted filter shouldn't affect recovery.
+ h.corrupt(storage.TypeTable, 3, 113888, 10)
+ h.corrupt(storage.TypeTable, -1, 20000, 1)
+
+ h.recover()
+ if h.db.seq != seq {
+ t.Errorf("invalid seq, want=%d got=%d", seq, h.db.seq)
+ }
+ h.check(985, 985)
+
+ h.close()
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go
index 8e975dc63..323353b2a 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go
@@ -7,15 +7,17 @@
package leveldb
import (
- "errors"
+ "container/list"
"fmt"
"io"
"os"
"runtime"
"strings"
"sync"
+ "sync/atomic"
"time"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/memdb"
@@ -30,41 +32,46 @@ type DB struct {
// Need 64-bit alignment.
seq uint64
+ // Session.
s *session
- // MemDB
+ // MemDB.
memMu sync.RWMutex
- mem *memdb.DB
- frozenMem *memdb.DB
+ memPool chan *memdb.DB
+ mem, frozenMem *memDB
journal *journal.Writer
journalWriter storage.Writer
journalFile storage.File
frozenJournalFile storage.File
frozenSeq uint64
- // Snapshot
+ // Snapshot.
snapsMu sync.Mutex
- snapsRoot snapshotElement
+ snapsList *list.List
- // Write
+ // Stats.
+ aliveSnaps, aliveIters int32
+
+ // Write.
writeC chan *Batch
writeMergedC chan bool
writeLockC chan struct{}
writeAckC chan error
+ writeDelay time.Duration
+ writeDelayN int
journalC chan *Batch
journalAckC chan error
- // Compaction
- tcompCmdC chan cCmd
- tcompPauseC chan chan<- struct{}
- tcompTriggerC chan struct{}
- mcompCmdC chan cCmd
- mcompTriggerC chan struct{}
- compErrC chan error
- compErrSetC chan error
- compStats [kNumLevels]cStats
-
- // Close
+ // Compaction.
+ tcompCmdC chan cCmd
+ tcompPauseC chan chan<- struct{}
+ mcompCmdC chan cCmd
+ compErrC chan error
+ compPerErrC chan error
+ compErrSetC chan error
+ compStats []cStats
+
+ // Close.
closeW sync.WaitGroup
closeC chan struct{}
closed uint32
@@ -77,7 +84,11 @@ func openDB(s *session) (*DB, error) {
db := &DB{
s: s,
// Initial sequence
- seq: s.stSeq,
+ seq: s.stSeqNum,
+ // MemDB
+ memPool: make(chan *memdb.DB, 1),
+ // Snapshot
+ snapsList: list.New(),
// Write
writeC: make(chan *Batch),
writeMergedC: make(chan bool),
@@ -86,17 +97,16 @@ func openDB(s *session) (*DB, error) {
journalC: make(chan *Batch),
journalAckC: make(chan error),
// Compaction
- tcompCmdC: make(chan cCmd),
- tcompPauseC: make(chan chan<- struct{}),
- tcompTriggerC: make(chan struct{}, 1),
- mcompCmdC: make(chan cCmd),
- mcompTriggerC: make(chan struct{}, 1),
- compErrC: make(chan error),
- compErrSetC: make(chan error),
+ tcompCmdC: make(chan cCmd),
+ tcompPauseC: make(chan chan<- struct{}),
+ mcompCmdC: make(chan cCmd),
+ compErrC: make(chan error),
+ compPerErrC: make(chan error),
+ compErrSetC: make(chan error),
+ compStats: make([]cStats, s.o.GetNumLevel()),
// Close
closeC: make(chan struct{}),
}
- db.initSnapshot()
if err := db.recoverJournal(); err != nil {
return nil, err
@@ -112,8 +122,9 @@ func openDB(s *session) (*DB, error) {
return nil, err
}
- // Don't include compaction error goroutine into wait group.
+ // Doesn't need to be included in the wait group.
go db.compactionError()
+ go db.mpoolDrain()
db.closeW.Add(3)
go db.tCompaction()
@@ -135,9 +146,10 @@ func openDB(s *session) (*DB, error) {
// detected in the DB. Corrupted DB can be recovered with Recover
// function.
//
+// The returned DB instance is goroutine-safe.
// The DB must be closed after use, by calling Close method.
-func Open(p storage.Storage, o *opt.Options) (db *DB, err error) {
- s, err := newSession(p, o)
+func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
if err != nil {
return
}
@@ -177,6 +189,7 @@ func Open(p storage.Storage, o *opt.Options) (db *DB, err error) {
// detected in the DB. Corrupted DB can be recovered with Recover
// function.
//
+// The returned DB instance is goroutine-safe.
// The DB must be closed after use, by calling Close method.
func OpenFile(path string, o *opt.Options) (db *DB, err error) {
stor, err := storage.OpenFile(path)
@@ -197,9 +210,10 @@ func OpenFile(path string, o *opt.Options) (db *DB, err error) {
// The DB must already exist or it will returns an error.
// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
//
+// The returned DB instance is goroutine-safe.
// The DB must be closed after use, by calling Close method.
-func Recover(p storage.Storage, o *opt.Options) (db *DB, err error) {
- s, err := newSession(p, o)
+func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
if err != nil {
return
}
@@ -225,6 +239,7 @@ func Recover(p storage.Storage, o *opt.Options) (db *DB, err error) {
// RecoverFile uses standard file-system backed storage implementation as desribed
// in the leveldb/storage package.
//
+// The returned DB instance is goroutine-safe.
// The DB must be closed after use, by calling Close method.
func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
stor, err := storage.OpenFile(path)
@@ -241,16 +256,28 @@ func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
}
func recoverTable(s *session, o *opt.Options) error {
- ff0, err := s.getFiles(storage.TypeTable)
+ o = dupOptions(o)
+ // Mask StrictReader, lets StrictRecovery doing its job.
+ o.Strict &= ^opt.StrictReader
+
+ // Get all tables and sort it by file number.
+ tableFiles_, err := s.getFiles(storage.TypeTable)
if err != nil {
return err
}
- ff1 := files(ff0)
- ff1.sort()
+ tableFiles := files(tableFiles_)
+ tableFiles.sort()
- var mSeq uint64
- var good, corrupted int
- rec := new(sessionRecord)
+ var (
+ maxSeq uint64
+ recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
+
+ // We will drop corrupted table.
+ strict = o.GetStrict(opt.StrictRecovery)
+
+ rec = &sessionRecord{numLevel: o.GetNumLevel()}
+ bpool = util.NewBufferPool(o.GetBlockSize() + 5)
+ )
buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) {
tmp = s.newTemp()
writer, err := tmp.Create()
@@ -264,8 +291,9 @@ func recoverTable(s *session, o *opt.Options) error {
tmp = nil
}
}()
+
+ // Copy entries.
tw := table.NewWriter(writer, o)
- // Copy records.
for iter.Next() {
key := iter.Key()
if validIkey(key) {
@@ -296,45 +324,73 @@ func recoverTable(s *session, o *opt.Options) error {
if err != nil {
return err
}
- defer reader.Close()
+ var closed bool
+ defer func() {
+ if !closed {
+ reader.Close()
+ }
+ }()
+
// Get file size.
size, err := reader.Seek(0, 2)
if err != nil {
return err
}
- var tSeq uint64
- var tgood, tcorrupted, blockerr int
- var min, max []byte
- tr := table.NewReader(reader, size, nil, o)
+
+ var (
+ tSeq uint64
+ tgoodKey, tcorruptedKey, tcorruptedBlock int
+ imin, imax []byte
+ )
+ tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o)
+ if err != nil {
+ return err
+ }
iter := tr.NewIterator(nil, nil)
- iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) {
- s.logf("table@recovery found error @%d %q", file.Num(), err)
- blockerr++
- })
+ if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
+ itererr.SetErrorCallback(func(err error) {
+ if errors.IsCorrupted(err) {
+ s.logf("table@recovery block corruption @%d %q", file.Num(), err)
+ tcorruptedBlock++
+ }
+ })
+ }
+
// Scan the table.
for iter.Next() {
key := iter.Key()
- _, seq, _, ok := parseIkey(key)
- if !ok {
- tcorrupted++
+ _, seq, _, kerr := parseIkey(key)
+ if kerr != nil {
+ tcorruptedKey++
continue
}
- tgood++
+ tgoodKey++
if seq > tSeq {
tSeq = seq
}
- if min == nil {
- min = append([]byte{}, key...)
+ if imin == nil {
+ imin = append([]byte{}, key...)
}
- max = append(max[:0], key...)
+ imax = append(imax[:0], key...)
}
if err := iter.Error(); err != nil {
iter.Release()
return err
}
iter.Release()
- if tgood > 0 {
- if tcorrupted > 0 || blockerr > 0 {
+
+ goodKey += tgoodKey
+ corruptedKey += tcorruptedKey
+ corruptedBlock += tcorruptedBlock
+
+ if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
+ droppedTable++
+ s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+ return nil
+ }
+
+ if tgoodKey > 0 {
+ if tcorruptedKey > 0 || tcorruptedBlock > 0 {
// Rebuild the table.
s.logf("table@recovery rebuilding @%d", file.Num())
iter := tr.NewIterator(nil, nil)
@@ -343,62 +399,77 @@ func recoverTable(s *session, o *opt.Options) error {
if err != nil {
return err
}
+ closed = true
reader.Close()
if err := file.Replace(tmp); err != nil {
return err
}
size = newSize
}
- if tSeq > mSeq {
- mSeq = tSeq
+ if tSeq > maxSeq {
+ maxSeq = tSeq
}
+ recoveredKey += tgoodKey
// Add table to level 0.
- rec.addTable(0, file.Num(), uint64(size), min, max)
- s.logf("table@recovery recovered @%d N·%d C·%d B·%d S·%d Q·%d", file.Num(), tgood, tcorrupted, blockerr, size, tSeq)
+ rec.addTable(0, file.Num(), uint64(size), imin, imax)
+ s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
} else {
- s.logf("table@recovery unrecoverable @%d C·%d B·%d S·%d", file.Num(), tcorrupted, blockerr, size)
+ droppedTable++
+ s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size)
}
- good += tgood
- corrupted += tcorrupted
-
return nil
}
+
// Recover all tables.
- if len(ff1) > 0 {
- s.logf("table@recovery F·%d", len(ff1))
- s.markFileNum(ff1[len(ff1)-1].Num())
- for _, file := range ff1 {
+ if len(tableFiles) > 0 {
+ s.logf("table@recovery F·%d", len(tableFiles))
+
+ // Mark file number as used.
+ s.markFileNum(tableFiles[len(tableFiles)-1].Num())
+
+ for _, file := range tableFiles {
if err := recoverTable(file); err != nil {
return err
}
}
- s.logf("table@recovery recovered F·%d N·%d C·%d Q·%d", len(ff1), good, corrupted, mSeq)
+
+ s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq)
}
+
// Set sequence number.
- rec.setSeq(mSeq + 1)
+ rec.setSeqNum(maxSeq)
+
// Create new manifest.
if err := s.create(); err != nil {
return err
}
+
// Commit.
return s.commit(rec)
}
-func (d *DB) recoverJournal() error {
- s := d.s
-
- ff0, err := s.getFiles(storage.TypeJournal)
+func (db *DB) recoverJournal() error {
+ // Get all tables and sort it by file number.
+ journalFiles_, err := db.s.getFiles(storage.TypeJournal)
if err != nil {
return err
}
- ff1 := files(ff0)
- ff1.sort()
- ff2 := make([]storage.File, 0, len(ff1))
- for _, file := range ff1 {
- if file.Num() >= s.stJournalNum || file.Num() == s.stPrevJournalNum {
- s.markFileNum(file.Num())
- ff2 = append(ff2, file)
+ journalFiles := files(journalFiles_)
+ journalFiles.sort()
+
+ // Discard older journal.
+ prev := -1
+ for i, file := range journalFiles {
+ if file.Num() >= db.s.stJournalNum {
+ if prev >= 0 {
+ i--
+ journalFiles[i] = journalFiles[prev]
+ }
+ journalFiles = journalFiles[i:]
+ break
+ } else if file.Num() == db.s.stPrevJournalNum {
+ prev = i
}
}
@@ -406,38 +477,43 @@ func (d *DB) recoverJournal() error {
var of storage.File
var mem *memdb.DB
batch := new(Batch)
- cm := newCMem(s)
+ cm := newCMem(db.s)
buf := new(util.Buffer)
// Options.
- strict := s.o.GetStrict(opt.StrictJournal)
- checksum := s.o.GetStrict(opt.StrictJournalChecksum)
- writeBuffer := s.o.GetWriteBuffer()
+ strict := db.s.o.GetStrict(opt.StrictJournal)
+ checksum := db.s.o.GetStrict(opt.StrictJournalChecksum)
+ writeBuffer := db.s.o.GetWriteBuffer()
recoverJournal := func(file storage.File) error {
- s.logf("journal@recovery recovering @%d", file.Num())
+ db.logf("journal@recovery recovering @%d", file.Num())
reader, err := file.Open()
if err != nil {
return err
}
defer reader.Close()
+
+ // Create/reset journal reader instance.
if jr == nil {
- jr = journal.NewReader(reader, dropper{s, file}, strict, checksum)
+ jr = journal.NewReader(reader, dropper{db.s, file}, strict, checksum)
} else {
- jr.Reset(reader, dropper{s, file}, strict, checksum)
+ jr.Reset(reader, dropper{db.s, file}, strict, checksum)
}
+
+ // Flush memdb and remove obsolete journal file.
if of != nil {
if mem.Len() > 0 {
if err := cm.flush(mem, 0); err != nil {
return err
}
}
- if err := cm.commit(file.Num(), d.seq); err != nil {
+ if err := cm.commit(file.Num(), db.seq); err != nil {
return err
}
cm.reset()
of.Remove()
of = nil
}
- // Reset memdb.
+
+ // Replay journal to memdb.
mem.Reset()
for {
r, err := jr.Next()
@@ -445,43 +521,58 @@ func (d *DB) recoverJournal() error {
if err == io.EOF {
break
}
- return err
+ return errors.SetFile(err, file)
}
+
buf.Reset()
if _, err := buf.ReadFrom(r); err != nil {
- if strict {
- return err
+ if err == io.ErrUnexpectedEOF {
+ // This is error returned due to corruption, with strict == false.
+ continue
+ } else {
+ return errors.SetFile(err, file)
}
- continue
}
- if err := batch.decode(buf.Bytes()); err != nil {
- return err
- }
- if err := batch.memReplay(mem); err != nil {
- return err
+ if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mem); err != nil {
+ if strict || !errors.IsCorrupted(err) {
+ return errors.SetFile(err, file)
+ } else {
+ db.s.logf("journal error: %v (skipped)", err)
+ // We won't apply sequence number as it might be corrupted.
+ continue
+ }
}
- d.seq = batch.seq + uint64(batch.len())
+
+ // Save sequence number.
+ db.seq = batch.seq + uint64(batch.Len())
+
+ // Flush it if large enough.
if mem.Size() >= writeBuffer {
- // Large enough, flush it.
if err := cm.flush(mem, 0); err != nil {
return err
}
- // Reset memdb.
mem.Reset()
}
}
+
of = file
return nil
}
+
// Recover all journals.
- if len(ff2) > 0 {
- s.logf("journal@recovery F·%d", len(ff2))
- mem = memdb.New(s.icmp, writeBuffer)
- for _, file := range ff2 {
+ if len(journalFiles) > 0 {
+ db.logf("journal@recovery F·%d", len(journalFiles))
+
+ // Mark file number as used.
+ db.s.markFileNum(journalFiles[len(journalFiles)-1].Num())
+
+ mem = memdb.New(db.s.icmp, writeBuffer)
+ for _, file := range journalFiles {
if err := recoverJournal(file); err != nil {
return err
}
}
+
// Flush the last journal.
if mem.Len() > 0 {
if err := cm.flush(mem, 0); err != nil {
@@ -489,72 +580,140 @@ func (d *DB) recoverJournal() error {
}
}
}
+
// Create a new journal.
- if _, err := d.newMem(0); err != nil {
+ if _, err := db.newMem(0); err != nil {
return err
}
+
// Commit.
- if err := cm.commit(d.journalFile.Num(), d.seq); err != nil {
+ if err := cm.commit(db.journalFile.Num(), db.seq); err != nil {
// Close journal.
- if d.journal != nil {
- d.journal.Close()
- d.journalWriter.Close()
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
}
return err
}
- // Remove the last journal.
+
+ // Remove the last obsolete journal file.
if of != nil {
of.Remove()
}
+
return nil
}
-func (d *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
- s := d.s
+func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
+ ikey := newIkey(key, seq, ktSeek)
- ikey := newIKey(key, seq, tSeek)
-
- em, fm := d.getMems()
- for _, m := range [...]*memdb.DB{em, fm} {
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
if m == nil {
continue
}
- mk, mv, me := m.Find(ikey)
+ defer m.decref()
+
+ mk, mv, me := m.mdb.Find(ikey)
if me == nil {
- ukey, _, t, ok := parseIkey(mk)
- if ok && s.icmp.uCompare(ukey, key) == 0 {
- if t == tDel {
+ ukey, _, kt, kerr := parseIkey(mk)
+ if kerr != nil {
+ // Shouldn't have had happen.
+ panic(kerr)
+ }
+ if db.s.icmp.uCompare(ukey, key) == 0 {
+ if kt == ktDel {
return nil, ErrNotFound
}
- return mv, nil
+ return append([]byte{}, mv...), nil
}
} else if me != ErrNotFound {
return nil, me
}
}
- v := s.version()
- value, cSched, err := v.get(ikey, ro)
+ v := db.s.version()
+ value, cSched, err := v.get(ikey, ro, false)
v.release()
if cSched {
// Trigger table compaction.
- d.compTrigger(d.tcompTriggerC)
+ db.compSendTrigger(db.tcompCmdC)
+ }
+ return
+}
+
+func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
+ ikey := newIkey(key, seq, ktSeek)
+
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
+ if m == nil {
+ continue
+ }
+ defer m.decref()
+
+ mk, _, me := m.mdb.Find(ikey)
+ if me == nil {
+ ukey, _, kt, kerr := parseIkey(mk)
+ if kerr != nil {
+ // Shouldn't have had happen.
+ panic(kerr)
+ }
+ if db.s.icmp.uCompare(ukey, key) == 0 {
+ if kt == ktDel {
+ return false, nil
+ }
+ return true, nil
+ }
+ } else if me != ErrNotFound {
+ return false, me
+ }
+ }
+
+ v := db.s.version()
+ _, cSched, err := v.get(ikey, ro, true)
+ v.release()
+ if cSched {
+ // Trigger table compaction.
+ db.compSendTrigger(db.tcompCmdC)
+ }
+ if err == nil {
+ ret = true
+ } else if err == ErrNotFound {
+ err = nil
}
return
}
// Get gets the value for the given key. It returns ErrNotFound if the
-// DB does not contain the key.
+// DB does not contains the key.
//
-// The caller should not modify the contents of the returned slice, but
-// it is safe to modify the contents of the argument after Get returns.
-func (d *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
- err = d.ok()
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = db.ok()
if err != nil {
return
}
- return d.get(key, d.getSeq(), ro)
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.get(key, se.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.has(key, se.seq, ro)
}
// NewIterator returns an iterator for the latest snapshot of the
@@ -573,14 +732,16 @@ func (d *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
// The iterator must be released after use, by calling Release method.
//
// Also read Iterator documentation of the leveldb/iterator package.
-func (d *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
- if err := d.ok(); err != nil {
+func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := db.ok(); err != nil {
return iterator.NewEmptyIterator(err)
}
- p := d.newSnapshot()
- defer p.Release()
- return p.NewIterator(slice, ro)
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ // Iterator holds 'version' lock, 'version' is immutable so snapshot
+ // can be released after iterator created.
+ return db.newIterator(se.seq, slice, ro)
}
// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
@@ -588,25 +749,35 @@ func (d *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterat
// content of snapshot are guaranteed to be consistent.
//
// The snapshot must be released after use, by calling Release method.
-func (d *DB) GetSnapshot() (*Snapshot, error) {
- if err := d.ok(); err != nil {
+func (db *DB) GetSnapshot() (*Snapshot, error) {
+ if err := db.ok(); err != nil {
return nil, err
}
- return d.newSnapshot(), nil
+ return db.newSnapshot(), nil
}
// GetProperty returns value of the given property name.
//
// Property names:
// leveldb.num-files-at-level{n}
-// Returns the number of filer at level 'n'.
+// Returns the number of files at level 'n'.
// leveldb.stats
// Returns statistics of the underlying DB.
// leveldb.sstables
// Returns sstables list for each level.
-func (d *DB) GetProperty(name string) (value string, err error) {
- err = d.ok()
+// leveldb.blockpool
+// Returns block pool stats.
+// leveldb.cachedblock
+// Returns size of cached block.
+// leveldb.openedtables
+// Returns number of opened tables.
+// leveldb.alivesnaps
+// Returns number of alive snapshots.
+// leveldb.aliveiters
+// Returns number of alive iterators.
+func (db *DB) GetProperty(name string) (value string, err error) {
+ err = db.ok()
if err != nil {
return
}
@@ -615,19 +786,18 @@ func (d *DB) GetProperty(name string) (value string, err error) {
if !strings.HasPrefix(name, prefix) {
return "", errors.New("leveldb: GetProperty: unknown property: " + name)
}
-
p := name[len(prefix):]
- s := d.s
- v := s.version()
+ v := db.s.version()
defer v.release()
+ numFilesPrefix := "num-files-at-level"
switch {
- case strings.HasPrefix(p, "num-files-at-level"):
+ case strings.HasPrefix(p, numFilesPrefix):
var level uint
var rest string
- n, _ := fmt.Scanf("%d%s", &level, &rest)
- if n != 1 || level >= kNumLevels {
+ n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
+ if n != 1 || int(level) >= db.s.o.GetNumLevel() {
err = errors.New("leveldb: GetProperty: invalid property: " + name)
} else {
value = fmt.Sprint(v.tLen(int(level)))
@@ -636,22 +806,36 @@ func (d *DB) GetProperty(name string) (value string, err error) {
value = "Compactions\n" +
" Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" +
"-------+------------+---------------+---------------+---------------+---------------\n"
- for level, tt := range v.tables {
- duration, read, write := d.compStats[level].get()
- if len(tt) == 0 && duration == 0 {
+ for level, tables := range v.tables {
+ duration, read, write := db.compStats[level].get()
+ if len(tables) == 0 && duration == 0 {
continue
}
value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
- level, len(tt), float64(tt.size())/1048576.0, duration.Seconds(),
+ level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
float64(read)/1048576.0, float64(write)/1048576.0)
}
case p == "sstables":
- for level, tt := range v.tables {
+ for level, tables := range v.tables {
value += fmt.Sprintf("--- level %d ---\n", level)
- for _, t := range tt {
- value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.file.Num(), t.size, t.min, t.max)
+ for _, t := range tables {
+ value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.file.Num(), t.size, t.imin, t.imax)
}
}
+ case p == "blockpool":
+ value = fmt.Sprintf("%v", db.s.tops.bpool)
+ case p == "cachedblock":
+ if db.s.tops.bcache != nil {
+ value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
+ } else {
+ value = "<nil>"
+ }
+ case p == "openedtables":
+ value = fmt.Sprintf("%d", db.s.tops.cache.Size())
+ case p == "alivesnaps":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
+ case p == "aliveiters":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
default:
err = errors.New("leveldb: GetProperty: unknown property: " + name)
}
@@ -665,23 +849,23 @@ func (d *DB) GetProperty(name string) (value string, err error) {
// data compresses by a factor of ten, the returned sizes will be one-tenth
// the size of the corresponding user data size.
// The results may not include the sizes of recently written data.
-func (d *DB) SizeOf(ranges []util.Range) (Sizes, error) {
- if err := d.ok(); err != nil {
+func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
+ if err := db.ok(); err != nil {
return nil, err
}
- v := d.s.version()
+ v := db.s.version()
defer v.release()
sizes := make(Sizes, 0, len(ranges))
for _, r := range ranges {
- min := newIKey(r.Start, kMaxSeq, tSeek)
- max := newIKey(r.Limit, kMaxSeq, tSeek)
- start, err := v.offsetOf(min)
+ imin := newIkey(r.Start, kMaxSeq, ktSeek)
+ imax := newIkey(r.Limit, kMaxSeq, ktSeek)
+ start, err := v.offsetOf(imin)
if err != nil {
return nil, err
}
- limit, err := v.offsetOf(max)
+ limit, err := v.offsetOf(imax)
if err != nil {
return nil, err
}
@@ -695,61 +879,67 @@ func (d *DB) SizeOf(ranges []util.Range) (Sizes, error) {
return sizes, nil
}
-// Close closes the DB. This will also releases any outstanding snapshot.
+// Close closes the DB. This will also releases any outstanding snapshot and
+// abort any in-flight compaction.
//
// It is not safe to close a DB until all outstanding iterators are released.
// It is valid to call Close multiple times. Other methods should not be
// called after the DB has been closed.
-func (d *DB) Close() error {
- if !d.setClosed() {
+func (db *DB) Close() error {
+ if !db.setClosed() {
return ErrClosed
}
- s := d.s
start := time.Now()
- s.log("db@close closing")
+ db.log("db@close closing")
// Clear the finalizer.
- runtime.SetFinalizer(d, nil)
+ runtime.SetFinalizer(db, nil)
// Get compaction error.
var err error
select {
- case err = <-d.compErrC:
+ case err = <-db.compErrC:
default:
}
- close(d.closeC)
+ // Signal all goroutines.
+ close(db.closeC)
- // Wait for the close WaitGroup.
- d.closeW.Wait()
+ // Wait for all gorotines to exit.
+ db.closeW.Wait()
+
+ // Lock writer and closes journal.
+ db.writeLockC <- struct{}{}
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ }
- // Close journal.
- if d.journal != nil {
- d.journal.Close()
- d.journalWriter.Close()
+ if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
}
// Close session.
- s.close()
- s.logf("db@close done T·%v", time.Since(start))
- s.release()
+ db.s.close()
+ db.logf("db@close done T·%v", time.Since(start))
+ db.s.release()
- if d.closer != nil {
- if err1 := d.closer.Close(); err == nil {
+ if db.closer != nil {
+ if err1 := db.closer.Close(); err == nil {
err = err1
}
}
- d.s = nil
- d.mem = nil
- d.frozenMem = nil
- d.journal = nil
- d.journalWriter = nil
- d.journalFile = nil
- d.frozenJournalFile = nil
- d.snapsRoot = snapshotElement{}
- d.closer = nil
+ // NIL'ing pointers.
+ db.s = nil
+ db.mem = nil
+ db.frozenMem = nil
+ db.journal = nil
+ db.journalWriter = nil
+ db.journalFile = nil
+ db.frozenJournalFile = nil
+ db.closer = nil
return err
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go
index c82bd9f28..447407aba 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go
@@ -7,11 +7,12 @@
package leveldb
import (
- "errors"
"sync"
"time"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
)
var (
@@ -68,13 +69,13 @@ type cMem struct {
}
func newCMem(s *session) *cMem {
- return &cMem{s: s, rec: new(sessionRecord)}
+ return &cMem{s: s, rec: &sessionRecord{numLevel: s.o.GetNumLevel()}}
}
func (c *cMem) flush(mem *memdb.DB, level int) error {
s := c.s
- // Write memdb to table
+ // Write memdb to table.
iter := mem.NewIterator(nil)
defer iter.Release()
t, n, err := s.tops.createFrom(iter)
@@ -82,51 +83,85 @@ func (c *cMem) flush(mem *memdb.DB, level int) error {
return err
}
+ // Pick level.
if level < 0 {
- level = s.version_NB().pickLevel(t.min.ukey(), t.max.ukey())
+ v := s.version()
+ level = v.pickLevel(t.imin.ukey(), t.imax.ukey())
+ v.release()
}
c.rec.addTableFile(level, t)
- s.logf("mem@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.min, t.max)
+ s.logf("mem@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.imin, t.imax)
c.level = level
return nil
}
func (c *cMem) reset() {
- c.rec = new(sessionRecord)
+ c.rec = &sessionRecord{numLevel: c.s.o.GetNumLevel()}
}
func (c *cMem) commit(journal, seq uint64) error {
c.rec.setJournalNum(journal)
- c.rec.setSeq(seq)
- // Commit changes
+ c.rec.setSeqNum(seq)
+
+ // Commit changes.
return c.s.commit(c.rec)
}
-func (d *DB) compactionError() {
- var err error
+func (db *DB) compactionError() {
+ var (
+ err error
+ wlocked bool
+ )
noerr:
+ // No error.
for {
select {
- case _, _ = <-d.closeC:
- return
- case err = <-d.compErrSetC:
- if err != nil {
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
+ case errors.IsCorrupted(err):
+ goto hasperr
+ default:
goto haserr
}
+ case _, _ = <-db.closeC:
+ return
}
}
haserr:
+ // Transient error.
for {
select {
- case _, _ = <-d.closeC:
- return
- case err = <-d.compErrSetC:
- if err == nil {
+ case db.compErrC <- err:
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
goto noerr
+ case errors.IsCorrupted(err):
+ goto hasperr
+ default:
}
- case d.compErrC <- err:
+ case _, _ = <-db.closeC:
+ return
+ }
+ }
+hasperr:
+ // Persistent error.
+ for {
+ select {
+ case db.compErrC <- err:
+ case db.compPerErrC <- err:
+ case db.writeLockC <- struct{}{}:
+ // Hold write lock, so that write won't pass-through.
+ wlocked = true
+ case _, _ = <-db.closeC:
+ if wlocked {
+ // We should release the lock or Close will hang.
+ <-db.writeLockC
+ }
+ return
}
}
}
@@ -137,114 +172,159 @@ func (cnt *compactionTransactCounter) incr() {
*cnt++
}
-func (d *DB) compactionTransact(name string, exec func(cnt *compactionTransactCounter) error, rollback func() error) {
- s := d.s
+type compactionTransactInterface interface {
+ run(cnt *compactionTransactCounter) error
+ revert() error
+}
+
+func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
defer func() {
if x := recover(); x != nil {
- if x == errCompactionTransactExiting && rollback != nil {
- if err := rollback(); err != nil {
- s.logf("%s rollback error %q", name, err)
+ if x == errCompactionTransactExiting {
+ if err := t.revert(); err != nil {
+ db.logf("%s revert error %q", name, err)
}
}
panic(x)
}
}()
+
const (
backoffMin = 1 * time.Second
backoffMax = 8 * time.Second
backoffMul = 2 * time.Second
)
- backoff := backoffMin
- backoffT := time.NewTimer(backoff)
- lastCnt := compactionTransactCounter(0)
+ var (
+ backoff = backoffMin
+ backoffT = time.NewTimer(backoff)
+ lastCnt = compactionTransactCounter(0)
+
+ disableBackoff = db.s.o.GetDisableCompactionBackoff()
+ )
for n := 0; ; n++ {
// Check wether the DB is closed.
- if d.isClosed() {
- s.logf("%s exiting", name)
- d.compactionExitTransact()
+ if db.isClosed() {
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
} else if n > 0 {
- s.logf("%s retrying N·%d", name, n)
+ db.logf("%s retrying N·%d", name, n)
}
// Execute.
cnt := compactionTransactCounter(0)
- err := exec(&cnt)
+ err := t.run(&cnt)
+ if err != nil {
+ db.logf("%s error I·%d %q", name, cnt, err)
+ }
// Set compaction error status.
select {
- case d.compErrSetC <- err:
- case _, _ = <-d.closeC:
- s.logf("%s exiting", name)
- d.compactionExitTransact()
+ case db.compErrSetC <- err:
+ case perr := <-db.compPerErrC:
+ if err != nil {
+ db.logf("%s exiting (persistent error %q)", name, perr)
+ db.compactionExitTransact()
+ }
+ case _, _ = <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
}
if err == nil {
return
}
- s.logf("%s error I·%d %q", name, cnt, err)
-
- // Reset backoff duration if counter is advancing.
- if cnt > lastCnt {
- backoff = backoffMin
- lastCnt = cnt
+ if errors.IsCorrupted(err) {
+ db.logf("%s exiting (corruption detected)", name)
+ db.compactionExitTransact()
}
- // Backoff.
- backoffT.Reset(backoff)
- if backoff < backoffMax {
- backoff *= backoffMul
- if backoff > backoffMax {
- backoff = backoffMax
+ if !disableBackoff {
+ // Reset backoff duration if counter is advancing.
+ if cnt > lastCnt {
+ backoff = backoffMin
+ lastCnt = cnt
+ }
+
+ // Backoff.
+ backoffT.Reset(backoff)
+ if backoff < backoffMax {
+ backoff *= backoffMul
+ if backoff > backoffMax {
+ backoff = backoffMax
+ }
+ }
+ select {
+ case <-backoffT.C:
+ case _, _ = <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
}
}
- select {
- case <-backoffT.C:
- case _, _ = <-d.closeC:
- s.logf("%s exiting", name)
- d.compactionExitTransact()
- }
}
}
-func (d *DB) compactionExitTransact() {
+type compactionTransactFunc struct {
+ runFunc func(cnt *compactionTransactCounter) error
+ revertFunc func() error
+}
+
+func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error {
+ return t.runFunc(cnt)
+}
+
+func (t *compactionTransactFunc) revert() error {
+ if t.revertFunc != nil {
+ return t.revertFunc()
+ }
+ return nil
+}
+
+func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) {
+ db.compactionTransact(name, &compactionTransactFunc{run, revert})
+}
+
+func (db *DB) compactionExitTransact() {
panic(errCompactionTransactExiting)
}
-func (d *DB) memCompaction() {
- mem := d.getFrozenMem()
+func (db *DB) memCompaction() {
+ mem := db.getFrozenMem()
if mem == nil {
return
}
+ defer mem.decref()
- s := d.s
- c := newCMem(s)
+ c := newCMem(db.s)
stats := new(cStatsStaging)
- s.logf("mem@flush N·%d S·%s", mem.Len(), shortenb(mem.Size()))
+ db.logf("mem@flush N·%d S·%s", mem.mdb.Len(), shortenb(mem.mdb.Size()))
// Don't compact empty memdb.
- if mem.Len() == 0 {
- s.logf("mem@flush skipping")
+ if mem.mdb.Len() == 0 {
+ db.logf("mem@flush skipping")
// drop frozen mem
- d.dropFrozenMem()
+ db.dropFrozenMem()
return
}
// Pause table compaction.
- ch := make(chan struct{})
+ resumeC := make(chan struct{})
select {
- case d.tcompPauseC <- (chan<- struct{})(ch):
- case _, _ = <-d.closeC:
+ case db.tcompPauseC <- (chan<- struct{})(resumeC):
+ case <-db.compPerErrC:
+ close(resumeC)
+ resumeC = nil
+ case _, _ = <-db.closeC:
return
}
- d.compactionTransact("mem@flush", func(cnt *compactionTransactCounter) (err error) {
+ db.compactionTransactFunc("mem@flush", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
defer stats.stopTimer()
- return c.flush(mem, -1)
+ return c.flush(mem.mdb, -1)
}, func() error {
for _, r := range c.rec.addedTables {
- s.logf("mem@flush rollback @%d", r.num)
- f := s.getTableFile(r.num)
+ db.logf("mem@flush revert @%d", r.num)
+ f := db.s.getTableFile(r.num)
if err := f.Remove(); err != nil {
return err
}
@@ -252,279 +332,327 @@ func (d *DB) memCompaction() {
return nil
})
- d.compactionTransact("mem@commit", func(cnt *compactionTransactCounter) (err error) {
+ db.compactionTransactFunc("mem@commit", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
defer stats.stopTimer()
- return c.commit(d.journalFile.Num(), d.frozenSeq)
+ return c.commit(db.journalFile.Num(), db.frozenSeq)
}, nil)
- s.logf("mem@flush commited F·%d T·%v", len(c.rec.addedTables), stats.duration)
+ db.logf("mem@flush committed F·%d T·%v", len(c.rec.addedTables), stats.duration)
for _, r := range c.rec.addedTables {
stats.write += r.size
}
- d.compStats[c.level].add(stats)
+ db.compStats[c.level].add(stats)
// Drop frozen mem.
- d.dropFrozenMem()
+ db.dropFrozenMem()
// Resume table compaction.
- select {
- case <-ch:
- case _, _ = <-d.closeC:
- return
+ if resumeC != nil {
+ select {
+ case <-resumeC:
+ close(resumeC)
+ case _, _ = <-db.closeC:
+ return
+ }
}
// Trigger table compaction.
- d.compTrigger(d.mcompTriggerC)
+ db.compSendTrigger(db.tcompCmdC)
}
-func (d *DB) tableCompaction(c *compaction, noTrivial bool) {
- s := d.s
+type tableCompactionBuilder struct {
+ db *DB
+ s *session
+ c *compaction
+ rec *sessionRecord
+ stat0, stat1 *cStatsStaging
- rec := new(sessionRecord)
- rec.addCompactionPointer(c.level, c.max)
+ snapHasLastUkey bool
+ snapLastUkey []byte
+ snapLastSeq uint64
+ snapIter int
+ snapKerrCnt int
+ snapDropCnt int
- if !noTrivial && c.trivial() {
- t := c.tables[0][0]
- s.logf("table@move L%d@%d -> L%d", c.level, t.file.Num(), c.level+1)
- rec.deleteTable(c.level, t.file.Num())
- rec.addTableFile(c.level+1, t)
- d.compactionTransact("table@move", func(cnt *compactionTransactCounter) (err error) {
- return s.commit(rec)
- }, nil)
- return
- }
+ kerrCnt int
+ dropCnt int
- var stats [2]cStatsStaging
- for i, tt := range c.tables {
- for _, t := range tt {
- stats[i].read += t.size
- // Insert deleted tables into record
- rec.deleteTable(c.level+i, t.file.Num())
- }
- }
- sourceSize := int(stats[0].read + stats[1].read)
- minSeq := d.minSeq()
- s.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.level, len(c.tables[0]), c.level+1, len(c.tables[1]), shortenb(sourceSize), minSeq)
-
- var snapUkey []byte
- var snapHasUkey bool
- var snapSeq uint64
- var snapIter int
- var snapDropCnt int
- var dropCnt int
- d.compactionTransact("table@build", func(cnt *compactionTransactCounter) (err error) {
- ukey := append([]byte{}, snapUkey...)
- hasUkey := snapHasUkey
- lseq := snapSeq
- dropCnt = snapDropCnt
- snapSched := snapIter == 0
-
- var tw *tWriter
- finish := func() error {
- t, err := tw.finish()
- if err != nil {
- return err
+ minSeq uint64
+ strict bool
+ tableSize int
+
+ tw *tWriter
+}
+
+func (b *tableCompactionBuilder) appendKV(key, value []byte) error {
+ // Create new table if not already.
+ if b.tw == nil {
+ // Check for pause event.
+ if b.db != nil {
+ select {
+ case ch := <-b.db.tcompPauseC:
+ b.db.pauseCompaction(ch)
+ case _, _ = <-b.db.closeC:
+ b.db.compactionExitTransact()
+ default:
}
- rec.addTableFile(c.level+1, t)
- stats[1].write += t.size
- s.logf("table@build created L%d@%d N·%d S·%s %q:%q", c.level+1, t.file.Num(), tw.tw.EntriesLen(), shortenb(int(t.size)), t.min, t.max)
- return nil
}
- defer func() {
- stats[1].stopTimer()
- if tw != nil {
- tw.drop()
- tw = nil
- }
- }()
+ // Create new table.
+ var err error
+ b.tw, err = b.s.tops.create()
+ if err != nil {
+ return err
+ }
+ }
- stats[1].startTimer()
- iter := c.newIterator()
- defer iter.Release()
- for i := 0; iter.Next(); i++ {
- // Incr transact counter.
- cnt.incr()
-
- // Skip until last state.
- if i < snapIter {
- continue
- }
+ // Write key/value into table.
+ return b.tw.append(key, value)
+}
- key := iKey(iter.Key())
+func (b *tableCompactionBuilder) needFlush() bool {
+ return b.tw.tw.BytesLen() >= b.tableSize
+}
- if c.shouldStopBefore(key) && tw != nil {
- err = finish()
- if err != nil {
- return
- }
- snapSched = true
- tw = nil
- }
+func (b *tableCompactionBuilder) flush() error {
+ t, err := b.tw.finish()
+ if err != nil {
+ return err
+ }
+ b.rec.addTableFile(b.c.level+1, t)
+ b.stat1.write += t.size
+ b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.level+1, t.file.Num(), b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
+ b.tw = nil
+ return nil
+}
- // Scheduled for snapshot, snapshot will used to retry compaction
- // if error occured.
- if snapSched {
- snapUkey = append(snapUkey[:0], ukey...)
- snapHasUkey = hasUkey
- snapSeq = lseq
- snapIter = i
- snapDropCnt = dropCnt
- snapSched = false
- }
+func (b *tableCompactionBuilder) cleanup() {
+ if b.tw != nil {
+ b.tw.drop()
+ b.tw = nil
+ }
+}
- if seq, t, ok := key.parseNum(); !ok {
- // Don't drop error keys
- ukey = ukey[:0]
- hasUkey = false
- lseq = kMaxSeq
- } else {
- if !hasUkey || s.icmp.uCompare(key.ukey(), ukey) != 0 {
- // First occurrence of this user key
- ukey = append(ukey[:0], key.ukey()...)
- hasUkey = true
- lseq = kMaxSeq
- }
+func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
+ snapResumed := b.snapIter > 0
+ hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary.
+ lastUkey := append([]byte{}, b.snapLastUkey...)
+ lastSeq := b.snapLastSeq
+ b.kerrCnt = b.snapKerrCnt
+ b.dropCnt = b.snapDropCnt
+ // Restore compaction state.
+ b.c.restore()
- drop := false
- if lseq <= minSeq {
- // Dropped because newer entry for same user key exist
- drop = true // (A)
- } else if t == tDel && seq <= minSeq && c.isBaseLevelForKey(ukey) {
- // For this user key:
- // (1) there is no data in higher levels
- // (2) data in lower levels will have larger seq numbers
- // (3) data in layers that are being compacted here and have
- // smaller seq numbers will be dropped in the next
- // few iterations of this loop (by rule (A) above).
- // Therefore this deletion marker is obsolete and can be dropped.
- drop = true
- }
+ defer b.cleanup()
- lseq = seq
- if drop {
- dropCnt++
- continue
- }
- }
+ b.stat1.startTimer()
+ defer b.stat1.stopTimer()
- // Create new table if not already
- if tw == nil {
- // Check for pause event.
- select {
- case ch := <-d.tcompPauseC:
- d.pauseCompaction(ch)
- case _, _ = <-d.closeC:
- d.compactionExitTransact()
- default:
- }
+ iter := b.c.newIterator()
+ defer iter.Release()
+ for i := 0; iter.Next(); i++ {
+ // Incr transact counter.
+ cnt.incr()
+
+ // Skip until last state.
+ if i < b.snapIter {
+ continue
+ }
- // Create new table.
- tw, err = s.tops.create()
- if err != nil {
- return
+ resumed := false
+ if snapResumed {
+ resumed = true
+ snapResumed = false
+ }
+
+ ikey := iter.Key()
+ ukey, seq, kt, kerr := parseIkey(ikey)
+
+ if kerr == nil {
+ shouldStop := !resumed && b.c.shouldStopBefore(ikey)
+
+ if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 {
+ // First occurrence of this user key.
+
+ // Only rotate tables if ukey doesn't hop across.
+ if b.tw != nil && (shouldStop || b.needFlush()) {
+ if err := b.flush(); err != nil {
+ return err
+ }
+
+ // Creates snapshot of the state.
+ b.c.save()
+ b.snapHasLastUkey = hasLastUkey
+ b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...)
+ b.snapLastSeq = lastSeq
+ b.snapIter = i
+ b.snapKerrCnt = b.kerrCnt
+ b.snapDropCnt = b.dropCnt
}
- }
- // Write key/value into table
- err = tw.add(key, iter.Value())
- if err != nil {
- return
+ hasLastUkey = true
+ lastUkey = append(lastUkey[:0], ukey...)
+ lastSeq = kMaxSeq
}
- // Finish table if it is big enough
- if tw.tw.BytesLen() >= kMaxTableSize {
- err = finish()
- if err != nil {
- return
- }
- snapSched = true
- tw = nil
+ switch {
+ case lastSeq <= b.minSeq:
+ // Dropped because newer entry for same user key exist
+ fallthrough // (A)
+ case kt == ktDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
+ // For this user key:
+ // (1) there is no data in higher levels
+ // (2) data in lower levels will have larger seq numbers
+ // (3) data in layers that are being compacted here and have
+ // smaller seq numbers will be dropped in the next
+ // few iterations of this loop (by rule (A) above).
+ // Therefore this deletion marker is obsolete and can be dropped.
+ lastSeq = seq
+ b.dropCnt++
+ continue
+ default:
+ lastSeq = seq
+ }
+ } else {
+ if b.strict {
+ return kerr
}
+
+ // Don't drop corrupted keys.
+ hasLastUkey = false
+ lastUkey = lastUkey[:0]
+ lastSeq = kMaxSeq
+ b.kerrCnt++
}
- err = iter.Error()
- if err != nil {
- return
+ if err := b.appendKV(ikey, iter.Value()); err != nil {
+ return err
}
+ }
- // Finish last table
- if tw != nil && !tw.empty() {
- err = finish()
- if err != nil {
- return
- }
- tw = nil
+ if err := iter.Error(); err != nil {
+ return err
+ }
+
+ // Finish last table.
+ if b.tw != nil && !b.tw.empty() {
+ return b.flush()
+ }
+ return nil
+}
+
+func (b *tableCompactionBuilder) revert() error {
+ for _, at := range b.rec.addedTables {
+ b.s.logf("table@build revert @%d", at.num)
+ f := b.s.getTableFile(at.num)
+ if err := f.Remove(); err != nil {
+ return err
}
+ }
+ return nil
+}
+
+func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
+ defer c.release()
+
+ rec := &sessionRecord{numLevel: db.s.o.GetNumLevel()}
+ rec.addCompPtr(c.level, c.imax)
+
+ if !noTrivial && c.trivial() {
+ t := c.tables[0][0]
+ db.logf("table@move L%d@%d -> L%d", c.level, t.file.Num(), c.level+1)
+ rec.delTable(c.level, t.file.Num())
+ rec.addTableFile(c.level+1, t)
+ db.compactionTransactFunc("table@move", func(cnt *compactionTransactCounter) (err error) {
+ return db.s.commit(rec)
+ }, nil)
return
- }, func() error {
- for _, r := range rec.addedTables {
- s.logf("table@build rollback @%d", r.num)
- f := s.getTableFile(r.num)
- if err := f.Remove(); err != nil {
- return err
- }
+ }
+
+ var stats [2]cStatsStaging
+ for i, tables := range c.tables {
+ for _, t := range tables {
+ stats[i].read += t.size
+ // Insert deleted tables into record
+ rec.delTable(c.level+i, t.file.Num())
}
- return nil
- })
+ }
+ sourceSize := int(stats[0].read + stats[1].read)
+ minSeq := db.minSeq()
+ db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.level, len(c.tables[0]), c.level+1, len(c.tables[1]), shortenb(sourceSize), minSeq)
+
+ b := &tableCompactionBuilder{
+ db: db,
+ s: db.s,
+ c: c,
+ rec: rec,
+ stat1: &stats[1],
+ minSeq: minSeq,
+ strict: db.s.o.GetStrict(opt.StrictCompaction),
+ tableSize: db.s.o.GetCompactionTableSize(c.level + 1),
+ }
+ db.compactionTransact("table@build", b)
// Commit changes
- d.compactionTransact("table@commit", func(cnt *compactionTransactCounter) (err error) {
+ db.compactionTransactFunc("table@commit", func(cnt *compactionTransactCounter) (err error) {
stats[1].startTimer()
defer stats[1].stopTimer()
- return s.commit(rec)
+ return db.s.commit(rec)
}, nil)
- resultSize := int(int(stats[1].write))
- s.logf("table@compaction commited F%s S%s D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), dropCnt, stats[1].duration)
+ resultSize := int(stats[1].write)
+ db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
// Save compaction stats
for i := range stats {
- d.compStats[c.level+1].add(&stats[i])
+ db.compStats[c.level+1].add(&stats[i])
}
}
-func (d *DB) tableRangeCompaction(level int, min, max []byte) {
- s := d.s
- s.logf("table@compaction range L%d %q:%q", level, min, max)
+func (db *DB) tableRangeCompaction(level int, umin, umax []byte) {
+ db.logf("table@compaction range L%d %q:%q", level, umin, umax)
if level >= 0 {
- if c := s.getCompactionRange(level, min, max); c != nil {
- d.tableCompaction(c, true)
+ if c := db.s.getCompactionRange(level, umin, umax); c != nil {
+ db.tableCompaction(c, true)
}
} else {
- v := s.version_NB()
+ v := db.s.version()
m := 1
for i, t := range v.tables[1:] {
- if t.isOverlaps(min, max, true, s.icmp) {
+ if t.overlaps(db.s.icmp, umin, umax, false) {
m = i + 1
}
}
+ v.release()
+
for level := 0; level < m; level++ {
- if c := s.getCompactionRange(level, min, max); c != nil {
- d.tableCompaction(c, true)
+ if c := db.s.getCompactionRange(level, umin, umax); c != nil {
+ db.tableCompaction(c, true)
}
}
}
}
-func (d *DB) tableAutoCompaction() {
- if c := d.s.pickCompaction(); c != nil {
- d.tableCompaction(c, false)
+func (db *DB) tableAutoCompaction() {
+ if c := db.s.pickCompaction(); c != nil {
+ db.tableCompaction(c, false)
}
}
-func (d *DB) tableNeedCompaction() bool {
- return d.s.version_NB().needCompaction()
+func (db *DB) tableNeedCompaction() bool {
+ v := db.s.version()
+ defer v.release()
+ return v.needCompaction()
}
-func (d *DB) pauseCompaction(ch chan<- struct{}) {
+func (db *DB) pauseCompaction(ch chan<- struct{}) {
select {
case ch <- struct{}{}:
- case _, _ = <-d.closeC:
- d.compactionExitTransact()
+ case _, _ = <-db.closeC:
+ db.compactionExitTransact()
}
}
@@ -537,7 +665,12 @@ type cIdle struct {
}
func (r cIdle) ack(err error) {
- r.ackC <- err
+ if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
+ r.ackC <- err
+ }
}
type cRange struct {
@@ -547,56 +680,67 @@ type cRange struct {
}
func (r cRange) ack(err error) {
- defer func() {
- recover()
- }()
if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
r.ackC <- err
}
}
-func (d *DB) compSendIdle(compC chan<- cCmd) error {
+// This will trigger auto compation and/or wait for all compaction to be done.
+func (db *DB) compSendIdle(compC chan<- cCmd) (err error) {
ch := make(chan error)
defer close(ch)
// Send cmd.
select {
case compC <- cIdle{ch}:
- case err := <-d.compErrC:
- return err
- case _, _ = <-d.closeC:
+ case err = <-db.compErrC:
+ return
+ case _, _ = <-db.closeC:
return ErrClosed
}
// Wait cmd.
- return <-ch
+ select {
+ case err = <-ch:
+ case err = <-db.compErrC:
+ case _, _ = <-db.closeC:
+ return ErrClosed
+ }
+ return err
}
-func (d *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
+// This will trigger auto compaction but will not wait for it.
+func (db *DB) compSendTrigger(compC chan<- cCmd) {
+ select {
+ case compC <- cIdle{}:
+ default:
+ }
+}
+
+// Send range compaction request.
+func (db *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
ch := make(chan error)
defer close(ch)
// Send cmd.
select {
case compC <- cRange{level, min, max, ch}:
- case err := <-d.compErrC:
+ case err := <-db.compErrC:
return err
- case _, _ = <-d.closeC:
+ case _, _ = <-db.closeC:
return ErrClosed
}
// Wait cmd.
select {
- case err = <-d.compErrC:
case err = <-ch:
+ case err = <-db.compErrC:
+ case _, _ = <-db.closeC:
+ return ErrClosed
}
return err
}
-func (d *DB) compTrigger(compTriggerC chan struct{}) {
- select {
- case compTriggerC <- struct{}{}:
- default:
- }
-}
-
-func (d *DB) mCompaction() {
+func (db *DB) mCompaction() {
var x cCmd
defer func() {
@@ -608,24 +752,27 @@ func (d *DB) mCompaction() {
if x != nil {
x.ack(ErrClosed)
}
- d.closeW.Done()
+ db.closeW.Done()
}()
for {
select {
- case _, _ = <-d.closeC:
+ case x = <-db.mcompCmdC:
+ switch x.(type) {
+ case cIdle:
+ db.memCompaction()
+ x.ack(nil)
+ x = nil
+ default:
+ panic("leveldb: unknown command")
+ }
+ case _, _ = <-db.closeC:
return
- case x = <-d.mcompCmdC:
- d.memCompaction()
- x.ack(nil)
- x = nil
- case <-d.mcompTriggerC:
- d.memCompaction()
}
}
}
-func (d *DB) tCompaction() {
+func (db *DB) tCompaction() {
var x cCmd
var ackQ []cCmd
@@ -642,19 +789,18 @@ func (d *DB) tCompaction() {
if x != nil {
x.ack(ErrClosed)
}
- d.closeW.Done()
+ db.closeW.Done()
}()
for {
- if d.tableNeedCompaction() {
+ if db.tableNeedCompaction() {
select {
- case x = <-d.tcompCmdC:
- case <-d.tcompTriggerC:
- case _, _ = <-d.closeC:
- return
- case ch := <-d.tcompPauseC:
- d.pauseCompaction(ch)
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
continue
+ case _, _ = <-db.closeC:
+ return
default:
}
} else {
@@ -664,12 +810,11 @@ func (d *DB) tCompaction() {
}
ackQ = ackQ[:0]
select {
- case x = <-d.tcompCmdC:
- case <-d.tcompTriggerC:
- case ch := <-d.tcompPauseC:
- d.pauseCompaction(ch)
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
continue
- case _, _ = <-d.closeC:
+ case _, _ = <-db.closeC:
return
}
}
@@ -678,11 +823,13 @@ func (d *DB) tCompaction() {
case cIdle:
ackQ = append(ackQ, x)
case cRange:
- d.tableRangeCompaction(cmd.level, cmd.min, cmd.max)
+ db.tableRangeCompaction(cmd.level, cmd.min, cmd.max)
x.ack(nil)
+ default:
+ panic("leveldb: unknown command")
}
x = nil
}
- d.tableAutoCompaction()
+ db.tableAutoCompaction()
}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go
index 9973a8fef..011a94a35 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go
@@ -8,7 +8,10 @@ package leveldb
import (
"errors"
+ "math/rand"
"runtime"
+ "sync"
+ "sync/atomic"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
@@ -19,50 +22,69 @@ var (
errInvalidIkey = errors.New("leveldb: Iterator: invalid internal key")
)
-func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
- s := db.s
+type memdbReleaser struct {
+ once sync.Once
+ m *memDB
+}
+
+func (mr *memdbReleaser) Release() {
+ mr.once.Do(func() {
+ mr.m.decref()
+ })
+}
+func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
em, fm := db.getMems()
- v := s.version()
+ v := db.s.version()
ti := v.getIterators(slice, ro)
n := len(ti) + 2
i := make([]iterator.Iterator, 0, n)
- i = append(i, em.NewIterator(slice))
+ emi := em.mdb.NewIterator(slice)
+ emi.SetReleaser(&memdbReleaser{m: em})
+ i = append(i, emi)
if fm != nil {
- i = append(i, fm.NewIterator(slice))
+ fmi := fm.mdb.NewIterator(slice)
+ fmi.SetReleaser(&memdbReleaser{m: fm})
+ i = append(i, fmi)
}
i = append(i, ti...)
- strict := s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator)
- mi := iterator.NewMergedIterator(i, s.icmp, strict)
+ strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
+ mi := iterator.NewMergedIterator(i, db.s.icmp, strict)
mi.SetReleaser(&versionReleaser{v: v})
return mi
}
func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
- var slice_ *util.Range
+ var islice *util.Range
if slice != nil {
- slice_ = &util.Range{}
+ islice = &util.Range{}
if slice.Start != nil {
- slice_.Start = newIKey(slice.Start, kMaxSeq, tSeek)
+ islice.Start = newIkey(slice.Start, kMaxSeq, ktSeek)
}
if slice.Limit != nil {
- slice_.Limit = newIKey(slice.Limit, kMaxSeq, tSeek)
+ islice.Limit = newIkey(slice.Limit, kMaxSeq, ktSeek)
}
}
- rawIter := db.newRawIterator(slice_, ro)
+ rawIter := db.newRawIterator(islice, ro)
iter := &dbIter{
+ db: db,
icmp: db.s.icmp,
iter: rawIter,
seq: seq,
- strict: db.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator),
+ strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
key: make([]byte, 0),
value: make([]byte, 0),
}
+ atomic.AddInt32(&db.aliveIters, 1)
runtime.SetFinalizer(iter, (*dbIter).Release)
return iter
}
+func (db *DB) iterSamplingRate() int {
+ return rand.Intn(2 * db.s.o.GetIteratorSamplingRate())
+}
+
type dir int
const (
@@ -75,16 +97,27 @@ const (
// dbIter represent an interator states over a database session.
type dbIter struct {
+ db *DB
icmp *iComparer
iter iterator.Iterator
seq uint64
strict bool
- dir dir
- key []byte
- value []byte
- err error
- releaser util.Releaser
+ smaplingGap int
+ dir dir
+ key []byte
+ value []byte
+ err error
+ releaser util.Releaser
+}
+
+func (i *dbIter) sampleSeek() {
+ ikey := i.iter.Key()
+ i.smaplingGap -= len(ikey) + len(i.iter.Value())
+ for i.smaplingGap < 0 {
+ i.smaplingGap += i.db.iterSamplingRate()
+ i.db.sampleSeek(ikey)
+ }
}
func (i *dbIter) setErr(err error) {
@@ -144,7 +177,7 @@ func (i *dbIter) Seek(key []byte) bool {
return false
}
- ikey := newIKey(key, i.seq, tSeek)
+ ikey := newIkey(key, i.seq, ktSeek)
if i.iter.Seek(ikey) {
i.dir = dirSOI
return i.next()
@@ -156,15 +189,15 @@ func (i *dbIter) Seek(key []byte) bool {
func (i *dbIter) next() bool {
for {
- ukey, seq, t, ok := parseIkey(i.iter.Key())
- if ok {
+ if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
if seq <= i.seq {
- switch t {
- case tDel:
+ switch kt {
+ case ktDel:
// Skip deleted key.
i.key = append(i.key[:0], ukey...)
i.dir = dirForward
- case tVal:
+ case ktVal:
if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
@@ -174,7 +207,7 @@ func (i *dbIter) next() bool {
}
}
} else if i.strict {
- i.setErr(errInvalidIkey)
+ i.setErr(kerr)
break
}
if !i.iter.Next() {
@@ -207,20 +240,20 @@ func (i *dbIter) prev() bool {
del := true
if i.iter.Valid() {
for {
- ukey, seq, t, ok := parseIkey(i.iter.Key())
- if ok {
+ if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
if seq <= i.seq {
if !del && i.icmp.uCompare(ukey, i.key) < 0 {
return true
}
- del = (t == tDel)
+ del = (kt == ktDel)
if !del {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
}
}
} else if i.strict {
- i.setErr(errInvalidIkey)
+ i.setErr(kerr)
return false
}
if !i.iter.Prev() {
@@ -249,13 +282,13 @@ func (i *dbIter) Prev() bool {
return i.Last()
case dirForward:
for i.iter.Prev() {
- ukey, _, _, ok := parseIkey(i.iter.Key())
- if ok {
+ if ukey, _, _, kerr := parseIkey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
if i.icmp.uCompare(ukey, i.key) < 0 {
goto cont
}
} else if i.strict {
- i.setErr(errInvalidIkey)
+ i.setErr(kerr)
return false
}
}
@@ -289,6 +322,7 @@ func (i *dbIter) Release() {
if i.releaser != nil {
i.releaser.Release()
+ i.releaser = nil
}
i.dir = dirReleased
@@ -296,13 +330,19 @@ func (i *dbIter) Release() {
i.value = nil
i.iter.Release()
i.iter = nil
+ atomic.AddInt32(&i.db.aliveIters, -1)
+ i.db = nil
}
}
func (i *dbIter) SetReleaser(releaser util.Releaser) {
- if i.dir != dirReleased {
- i.releaser = releaser
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
}
+ i.releaser = releaser
}
func (i *dbIter) Error() error {
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
index 225b7cd5e..0372848ff 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
@@ -7,8 +7,11 @@
package leveldb
import (
+ "container/list"
+ "fmt"
"runtime"
"sync"
+ "sync/atomic"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
@@ -18,51 +21,41 @@ import (
type snapshotElement struct {
seq uint64
ref int
- // Next and previous pointers in the doubly-linked list of elements.
- next, prev *snapshotElement
-}
-
-// Initialize the snapshot.
-func (db *DB) initSnapshot() {
- db.snapsRoot.next = &db.snapsRoot
- db.snapsRoot.prev = &db.snapsRoot
+ e *list.Element
}
// Acquires a snapshot, based on latest sequence.
func (db *DB) acquireSnapshot() *snapshotElement {
db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
seq := db.getSeq()
- elem := db.snapsRoot.prev
- if elem == &db.snapsRoot || elem.seq != seq {
- at := db.snapsRoot.prev
- next := at.next
- elem = &snapshotElement{
- seq: seq,
- prev: at,
- next: next,
+
+ if e := db.snapsList.Back(); e != nil {
+ se := e.Value.(*snapshotElement)
+ if se.seq == seq {
+ se.ref++
+ return se
+ } else if seq < se.seq {
+ panic("leveldb: sequence number is not increasing")
}
- at.next = elem
- next.prev = elem
}
- elem.ref++
- db.snapsMu.Unlock()
- return elem
+ se := &snapshotElement{seq: seq, ref: 1}
+ se.e = db.snapsList.PushBack(se)
+ return se
}
// Releases given snapshot element.
-func (db *DB) releaseSnapshot(elem *snapshotElement) {
- if !db.isClosed() {
- db.snapsMu.Lock()
- elem.ref--
- if elem.ref == 0 {
- elem.prev.next = elem.next
- elem.next.prev = elem.prev
- elem.next = nil
- elem.prev = nil
- } else if elem.ref < 0 {
- panic("leveldb: Snapshot: negative element reference")
- }
- db.snapsMu.Unlock()
+func (db *DB) releaseSnapshot(se *snapshotElement) {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ se.ref--
+ if se.ref == 0 {
+ db.snapsList.Remove(se.e)
+ se.e = nil
+ } else if se.ref < 0 {
+ panic("leveldb: Snapshot: negative element reference")
}
}
@@ -70,10 +63,11 @@ func (db *DB) releaseSnapshot(elem *snapshotElement) {
func (db *DB) minSeq() uint64 {
db.snapsMu.Lock()
defer db.snapsMu.Unlock()
- elem := db.snapsRoot.prev
- if elem != &db.snapsRoot {
- return elem.seq
+
+ if e := db.snapsList.Front(); e != nil {
+ return e.Value.(*snapshotElement).seq
}
+
return db.getSeq()
}
@@ -81,38 +75,59 @@ func (db *DB) minSeq() uint64 {
type Snapshot struct {
db *DB
elem *snapshotElement
- mu sync.Mutex
+ mu sync.RWMutex
released bool
}
// Creates new snapshot object.
func (db *DB) newSnapshot() *Snapshot {
- p := &Snapshot{
+ snap := &Snapshot{
db: db,
elem: db.acquireSnapshot(),
}
- runtime.SetFinalizer(p, (*Snapshot).Release)
- return p
+ atomic.AddInt32(&db.aliveSnaps, 1)
+ runtime.SetFinalizer(snap, (*Snapshot).Release)
+ return snap
+}
+
+func (snap *Snapshot) String() string {
+ return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq)
}
// Get gets the value for the given key. It returns ErrNotFound if
-// the DB does not contain the key.
+// the DB does not contains the key.
//
// The caller should not modify the contents of the returned slice, but
// it is safe to modify the contents of the argument after Get returns.
-func (p *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
- db := p.db
- err = db.ok()
+func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = snap.db.ok()
if err != nil {
return
}
- p.mu.Lock()
- defer p.mu.Unlock()
- if p.released {
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
err = ErrSnapshotReleased
return
}
- return db.get(key, p.elem.seq, ro)
+ return snap.db.get(key, snap.elem.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = snap.db.ok()
+ if err != nil {
+ return
+ }
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
+ err = ErrSnapshotReleased
+ return
+ }
+ return snap.db.has(key, snap.elem.seq, ro)
}
// NewIterator returns an iterator for the snapshot of the uderlying DB.
@@ -132,17 +147,18 @@ func (p *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error
// iterator would be still valid until released.
//
// Also read Iterator documentation of the leveldb/iterator package.
-func (p *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
- db := p.db
- if err := db.ok(); err != nil {
+func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := snap.db.ok(); err != nil {
return iterator.NewEmptyIterator(err)
}
- p.mu.Lock()
- defer p.mu.Unlock()
- if p.released {
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+ if snap.released {
return iterator.NewEmptyIterator(ErrSnapshotReleased)
}
- return db.newIterator(p.elem.seq, slice, ro)
+ // Since iterator already hold version ref, it doesn't need to
+ // hold snapshot ref.
+ return snap.db.newIterator(snap.elem.seq, slice, ro)
}
// Release releases the snapshot. This will not release any returned
@@ -150,16 +166,18 @@ func (p *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.
// underlying DB is closed.
//
// Other methods should not be called after the snapshot has been released.
-func (p *Snapshot) Release() {
- p.mu.Lock()
- if !p.released {
+func (snap *Snapshot) Release() {
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+
+ if !snap.released {
// Clear the finalizer.
- runtime.SetFinalizer(p, nil)
+ runtime.SetFinalizer(snap, nil)
- p.released = true
- p.db.releaseSnapshot(p.elem)
- p.db = nil
- p.elem = nil
+ snap.released = true
+ snap.db.releaseSnapshot(snap.elem)
+ atomic.AddInt32(&snap.db.aliveSnaps, -1)
+ snap.db = nil
+ snap.elem = nil
}
- p.mu.Unlock()
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go
index a13706142..d4db9d6dd 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go
@@ -8,106 +8,203 @@ package leveldb
import (
"sync/atomic"
+ "time"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/memdb"
)
+type memDB struct {
+ db *DB
+ mdb *memdb.DB
+ ref int32
+}
+
+func (m *memDB) incref() {
+ atomic.AddInt32(&m.ref, 1)
+}
+
+func (m *memDB) decref() {
+ if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
+ // Only put back memdb with std capacity.
+ if m.mdb.Capacity() == m.db.s.o.GetWriteBuffer() {
+ m.mdb.Reset()
+ m.db.mpoolPut(m.mdb)
+ }
+ m.db = nil
+ m.mdb = nil
+ } else if ref < 0 {
+ panic("negative memdb ref")
+ }
+}
+
// Get latest sequence number.
-func (d *DB) getSeq() uint64 {
- return atomic.LoadUint64(&d.seq)
+func (db *DB) getSeq() uint64 {
+ return atomic.LoadUint64(&db.seq)
}
// Atomically adds delta to seq.
-func (d *DB) addSeq(delta uint64) {
- atomic.AddUint64(&d.seq, delta)
+func (db *DB) addSeq(delta uint64) {
+ atomic.AddUint64(&db.seq, delta)
+}
+
+func (db *DB) sampleSeek(ikey iKey) {
+ v := db.s.version()
+ if v.sampleSeek(ikey) {
+ // Trigger table compaction.
+ db.compSendTrigger(db.tcompCmdC)
+ }
+ v.release()
+}
+
+func (db *DB) mpoolPut(mem *memdb.DB) {
+ defer func() {
+ recover()
+ }()
+ select {
+ case db.memPool <- mem:
+ default:
+ }
+}
+
+func (db *DB) mpoolGet() *memdb.DB {
+ select {
+ case mem := <-db.memPool:
+ return mem
+ default:
+ return nil
+ }
+}
+
+func (db *DB) mpoolDrain() {
+ ticker := time.NewTicker(30 * time.Second)
+ for {
+ select {
+ case <-ticker.C:
+ select {
+ case <-db.memPool:
+ default:
+ }
+ case _, _ = <-db.closeC:
+ close(db.memPool)
+ return
+ }
+ }
}
// Create new memdb and froze the old one; need external synchronization.
// newMem only called synchronously by the writer.
-func (d *DB) newMem(n int) (mem *memdb.DB, err error) {
- s := d.s
-
- num := s.allocFileNum()
- file := s.getJournalFile(num)
+func (db *DB) newMem(n int) (mem *memDB, err error) {
+ num := db.s.allocFileNum()
+ file := db.s.getJournalFile(num)
w, err := file.Create()
if err != nil {
- s.reuseFileNum(num)
+ db.s.reuseFileNum(num)
return
}
- d.memMu.Lock()
- if d.journal == nil {
- d.journal = journal.NewWriter(w)
+
+ db.memMu.Lock()
+ defer db.memMu.Unlock()
+
+ if db.frozenMem != nil {
+ panic("still has frozen mem")
+ }
+
+ if db.journal == nil {
+ db.journal = journal.NewWriter(w)
} else {
- d.journal.Reset(w)
- d.journalWriter.Close()
- d.frozenJournalFile = d.journalFile
- }
- d.journalWriter = w
- d.journalFile = file
- d.frozenMem = d.mem
- d.mem = memdb.New(s.icmp, maxInt(d.s.o.GetWriteBuffer(), n))
- mem = d.mem
- // The seq only incremented by the writer.
- d.frozenSeq = d.seq
- d.memMu.Unlock()
+ db.journal.Reset(w)
+ db.journalWriter.Close()
+ db.frozenJournalFile = db.journalFile
+ }
+ db.journalWriter = w
+ db.journalFile = file
+ db.frozenMem = db.mem
+ mdb := db.mpoolGet()
+ if mdb == nil || mdb.Capacity() < n {
+ mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
+ }
+ mem = &memDB{
+ db: db,
+ mdb: mdb,
+ ref: 2,
+ }
+ db.mem = mem
+ // The seq only incremented by the writer. And whoever called newMem
+ // should hold write lock, so no need additional synchronization here.
+ db.frozenSeq = db.seq
return
}
// Get all memdbs.
-func (d *DB) getMems() (e *memdb.DB, f *memdb.DB) {
- d.memMu.RLock()
- defer d.memMu.RUnlock()
- return d.mem, d.frozenMem
+func (db *DB) getMems() (e, f *memDB) {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem == nil {
+ panic("nil effective mem")
+ }
+ db.mem.incref()
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.mem, db.frozenMem
}
// Get frozen memdb.
-func (d *DB) getEffectiveMem() *memdb.DB {
- d.memMu.RLock()
- defer d.memMu.RUnlock()
- return d.mem
+func (db *DB) getEffectiveMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem == nil {
+ panic("nil effective mem")
+ }
+ db.mem.incref()
+ return db.mem
}
// Check whether we has frozen memdb.
-func (d *DB) hasFrozenMem() bool {
- d.memMu.RLock()
- defer d.memMu.RUnlock()
- return d.frozenMem != nil
+func (db *DB) hasFrozenMem() bool {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ return db.frozenMem != nil
}
// Get frozen memdb.
-func (d *DB) getFrozenMem() *memdb.DB {
- d.memMu.RLock()
- defer d.memMu.RUnlock()
- return d.frozenMem
+func (db *DB) getFrozenMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.frozenMem
}
// Drop frozen memdb; assume that frozen memdb isn't nil.
-func (d *DB) dropFrozenMem() {
- d.memMu.Lock()
- if err := d.frozenJournalFile.Remove(); err != nil {
- d.s.logf("journal@remove removing @%d %q", d.frozenJournalFile.Num(), err)
+func (db *DB) dropFrozenMem() {
+ db.memMu.Lock()
+ if err := db.frozenJournalFile.Remove(); err != nil {
+ db.logf("journal@remove removing @%d %q", db.frozenJournalFile.Num(), err)
} else {
- d.s.logf("journal@remove removed @%d", d.frozenJournalFile.Num())
+ db.logf("journal@remove removed @%d", db.frozenJournalFile.Num())
}
- d.frozenJournalFile = nil
- d.frozenMem = nil
- d.memMu.Unlock()
+ db.frozenJournalFile = nil
+ db.frozenMem.decref()
+ db.frozenMem = nil
+ db.memMu.Unlock()
}
// Set closed flag; return true if not already closed.
-func (d *DB) setClosed() bool {
- return atomic.CompareAndSwapUint32(&d.closed, 0, 1)
+func (db *DB) setClosed() bool {
+ return atomic.CompareAndSwapUint32(&db.closed, 0, 1)
}
// Check whether DB was closed.
-func (d *DB) isClosed() bool {
- return atomic.LoadUint32(&d.closed) != 0
+func (db *DB) isClosed() bool {
+ return atomic.LoadUint32(&db.closed) != 0
}
// Check read ok status.
-func (d *DB) ok() error {
- if d.isClosed() {
+func (db *DB) ok() error {
+ if db.isClosed() {
return ErrClosed
}
return nil
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go
index 5de7d9723..38bfbf1ea 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go
@@ -7,6 +7,10 @@
package leveldb
import (
+ "bytes"
+ "container/list"
+ crand "crypto/rand"
+ "encoding/binary"
"fmt"
"math/rand"
"os"
@@ -20,6 +24,7 @@ import (
"unsafe"
"github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
@@ -148,25 +153,29 @@ func (h *dbHarness) maxNextLevelOverlappingBytes(want uint64) {
t := h.t
db := h.db
- var res uint64
+ var (
+ maxOverlaps uint64
+ maxLevel int
+ )
v := db.s.version()
for i, tt := range v.tables[1 : len(v.tables)-1] {
level := i + 1
next := v.tables[level+1]
for _, t := range tt {
- var r tFiles
- min, max := t.min.ukey(), t.max.ukey()
- next.getOverlaps(min, max, &r, true, db.s.icmp.ucmp)
+ r := next.getOverlaps(nil, db.s.icmp, t.imin.ukey(), t.imax.ukey(), false)
sum := r.size()
- if sum > res {
- res = sum
+ if sum > maxOverlaps {
+ maxOverlaps = sum
+ maxLevel = level
}
}
}
v.release()
- if res > want {
- t.Errorf("next level overlapping bytes is more than %d, got=%d", want, res)
+ if maxOverlaps > want {
+ t.Errorf("next level most overlapping bytes is more than %d, got=%d level=%d", want, maxOverlaps, maxLevel)
+ } else {
+ t.Logf("next level most overlapping bytes is %d, level=%d want=%d", maxOverlaps, maxLevel, want)
}
}
@@ -239,7 +248,7 @@ func (h *dbHarness) allEntriesFor(key, want string) {
db := h.db
s := db.s
- ikey := newIKey([]byte(key), kMaxSeq, tVal)
+ ikey := newIkey([]byte(key), kMaxSeq, ktVal)
iter := db.newRawIterator(nil, nil)
if !iter.Seek(ikey) && iter.Error() != nil {
t.Error("AllEntries: error during seek, err: ", iter.Error())
@@ -248,19 +257,18 @@ func (h *dbHarness) allEntriesFor(key, want string) {
res := "[ "
first := true
for iter.Valid() {
- rkey := iKey(iter.Key())
- if _, t, ok := rkey.parseNum(); ok {
- if s.icmp.uCompare(ikey.ukey(), rkey.ukey()) != 0 {
+ if ukey, _, kt, kerr := parseIkey(iter.Key()); kerr == nil {
+ if s.icmp.uCompare(ikey.ukey(), ukey) != 0 {
break
}
if !first {
res += ", "
}
first = false
- switch t {
- case tVal:
+ switch kt {
+ case ktVal:
res += string(iter.Value())
- case tDel:
+ case ktDel:
res += "DEL"
}
} else {
@@ -325,6 +333,8 @@ func (h *dbHarness) compactMem() {
t := h.t
db := h.db
+ t.Log("starting memdb compaction")
+
db.writeLockC <- struct{}{}
defer func() {
<-db.writeLockC
@@ -340,6 +350,8 @@ func (h *dbHarness) compactMem() {
if h.totalTables() == 0 {
t.Error("zero tables after mem compaction")
}
+
+ t.Log("memdb compaction done")
}
func (h *dbHarness) compactRangeAtErr(level int, min, max string, wanterr bool) {
@@ -354,6 +366,8 @@ func (h *dbHarness) compactRangeAtErr(level int, min, max string, wanterr bool)
_max = []byte(max)
}
+ t.Logf("starting table range compaction: level=%d, min=%q, max=%q", level, min, max)
+
if err := db.compSendRange(db.tcompCmdC, level, _min, _max); err != nil {
if wanterr {
t.Log("CompactRangeAt: got error (expected): ", err)
@@ -363,6 +377,8 @@ func (h *dbHarness) compactRangeAtErr(level int, min, max string, wanterr bool)
} else if wanterr {
t.Error("CompactRangeAt: expect error")
}
+
+ t.Log("table range compaction done")
}
func (h *dbHarness) compactRangeAt(level int, min, max string) {
@@ -373,6 +389,8 @@ func (h *dbHarness) compactRange(min, max string) {
t := h.t
db := h.db
+ t.Logf("starting DB range compaction: min=%q, max=%q", min, max)
+
var r util.Range
if min != "" {
r.Start = []byte(min)
@@ -383,21 +401,25 @@ func (h *dbHarness) compactRange(min, max string) {
if err := db.CompactRange(r); err != nil {
t.Error("CompactRange: got error: ", err)
}
-}
-func (h *dbHarness) sizeAssert(start, limit string, low, hi uint64) {
- t := h.t
- db := h.db
+ t.Log("DB range compaction done")
+}
- s, err := db.SizeOf([]util.Range{
+func (h *dbHarness) sizeOf(start, limit string) uint64 {
+ sz, err := h.db.SizeOf([]util.Range{
{[]byte(start), []byte(limit)},
})
if err != nil {
- t.Error("SizeOf: got error: ", err)
+ h.t.Error("SizeOf: got error: ", err)
}
- if s.Sum() < low || s.Sum() > hi {
- t.Errorf("sizeof %q to %q not in range, want %d - %d, got %d",
- shorten(start), shorten(limit), low, hi, s.Sum())
+ return sz.Sum()
+}
+
+func (h *dbHarness) sizeAssert(start, limit string, low, hi uint64) {
+ sz := h.sizeOf(start, limit)
+ if sz < low || sz > hi {
+ h.t.Errorf("sizeOf %q to %q not in range, want %d - %d, got %d",
+ shorten(start), shorten(limit), low, hi, sz)
}
}
@@ -504,13 +526,13 @@ func Test_FieldsAligned(t *testing.T) {
p1 := new(DB)
testAligned(t, "DB.seq", unsafe.Offsetof(p1.seq))
p2 := new(session)
- testAligned(t, "session.stFileNum", unsafe.Offsetof(p2.stFileNum))
+ testAligned(t, "session.stNextFileNum", unsafe.Offsetof(p2.stNextFileNum))
testAligned(t, "session.stJournalNum", unsafe.Offsetof(p2.stJournalNum))
testAligned(t, "session.stPrevJournalNum", unsafe.Offsetof(p2.stPrevJournalNum))
- testAligned(t, "session.stSeq", unsafe.Offsetof(p2.stSeq))
+ testAligned(t, "session.stSeqNum", unsafe.Offsetof(p2.stSeqNum))
}
-func TestDb_Locking(t *testing.T) {
+func TestDB_Locking(t *testing.T) {
h := newDbHarness(t)
defer h.stor.Close()
h.openAssert(false)
@@ -518,7 +540,7 @@ func TestDb_Locking(t *testing.T) {
h.openAssert(true)
}
-func TestDb_Empty(t *testing.T) {
+func TestDB_Empty(t *testing.T) {
trun(t, func(h *dbHarness) {
h.get("foo", false)
@@ -527,7 +549,7 @@ func TestDb_Empty(t *testing.T) {
})
}
-func TestDb_ReadWrite(t *testing.T) {
+func TestDB_ReadWrite(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("foo", "v1")
h.getVal("foo", "v1")
@@ -542,7 +564,7 @@ func TestDb_ReadWrite(t *testing.T) {
})
}
-func TestDb_PutDeleteGet(t *testing.T) {
+func TestDB_PutDeleteGet(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("foo", "v1")
h.getVal("foo", "v1")
@@ -556,7 +578,7 @@ func TestDb_PutDeleteGet(t *testing.T) {
})
}
-func TestDb_EmptyBatch(t *testing.T) {
+func TestDB_EmptyBatch(t *testing.T) {
h := newDbHarness(t)
defer h.close()
@@ -568,7 +590,7 @@ func TestDb_EmptyBatch(t *testing.T) {
h.get("foo", false)
}
-func TestDb_GetFromFrozen(t *testing.T) {
+func TestDB_GetFromFrozen(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100100})
defer h.close()
@@ -594,7 +616,7 @@ func TestDb_GetFromFrozen(t *testing.T) {
h.get("k2", true)
}
-func TestDb_GetFromTable(t *testing.T) {
+func TestDB_GetFromTable(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("foo", "v1")
h.compactMem()
@@ -602,7 +624,7 @@ func TestDb_GetFromTable(t *testing.T) {
})
}
-func TestDb_GetSnapshot(t *testing.T) {
+func TestDB_GetSnapshot(t *testing.T) {
trun(t, func(h *dbHarness) {
bar := strings.Repeat("b", 200)
h.put("foo", "v1")
@@ -636,7 +658,7 @@ func TestDb_GetSnapshot(t *testing.T) {
})
}
-func TestDb_GetLevel0Ordering(t *testing.T) {
+func TestDB_GetLevel0Ordering(t *testing.T) {
trun(t, func(h *dbHarness) {
for i := 0; i < 4; i++ {
h.put("bar", fmt.Sprintf("b%d", i))
@@ -659,7 +681,7 @@ func TestDb_GetLevel0Ordering(t *testing.T) {
})
}
-func TestDb_GetOrderedByLevels(t *testing.T) {
+func TestDB_GetOrderedByLevels(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("foo", "v1")
h.compactMem()
@@ -671,7 +693,7 @@ func TestDb_GetOrderedByLevels(t *testing.T) {
})
}
-func TestDb_GetPicksCorrectFile(t *testing.T) {
+func TestDB_GetPicksCorrectFile(t *testing.T) {
trun(t, func(h *dbHarness) {
// Arrange to have multiple files in a non-level-0 level.
h.put("a", "va")
@@ -695,7 +717,7 @@ func TestDb_GetPicksCorrectFile(t *testing.T) {
})
}
-func TestDb_GetEncountersEmptyLevel(t *testing.T) {
+func TestDB_GetEncountersEmptyLevel(t *testing.T) {
trun(t, func(h *dbHarness) {
// Arrange for the following to happen:
// * sstable A in level 0
@@ -750,7 +772,7 @@ func TestDb_GetEncountersEmptyLevel(t *testing.T) {
})
}
-func TestDb_IterMultiWithDelete(t *testing.T) {
+func TestDB_IterMultiWithDelete(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("a", "va")
h.put("b", "vb")
@@ -776,7 +798,7 @@ func TestDb_IterMultiWithDelete(t *testing.T) {
})
}
-func TestDb_IteratorPinsRef(t *testing.T) {
+func TestDB_IteratorPinsRef(t *testing.T) {
h := newDbHarness(t)
defer h.close()
@@ -800,7 +822,7 @@ func TestDb_IteratorPinsRef(t *testing.T) {
iter.Release()
}
-func TestDb_Recover(t *testing.T) {
+func TestDB_Recover(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("foo", "v1")
h.put("baz", "v5")
@@ -822,7 +844,7 @@ func TestDb_Recover(t *testing.T) {
})
}
-func TestDb_RecoverWithEmptyJournal(t *testing.T) {
+func TestDB_RecoverWithEmptyJournal(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("foo", "v1")
h.put("foo", "v2")
@@ -836,7 +858,7 @@ func TestDb_RecoverWithEmptyJournal(t *testing.T) {
})
}
-func TestDb_RecoverDuringMemtableCompaction(t *testing.T) {
+func TestDB_RecoverDuringMemtableCompaction(t *testing.T) {
truno(t, &opt.Options{WriteBuffer: 1000000}, func(h *dbHarness) {
h.stor.DelaySync(storage.TypeTable)
@@ -852,7 +874,7 @@ func TestDb_RecoverDuringMemtableCompaction(t *testing.T) {
})
}
-func TestDb_MinorCompactionsHappen(t *testing.T) {
+func TestDB_MinorCompactionsHappen(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 10000})
defer h.close()
@@ -876,7 +898,7 @@ func TestDb_MinorCompactionsHappen(t *testing.T) {
}
}
-func TestDb_RecoverWithLargeJournal(t *testing.T) {
+func TestDB_RecoverWithLargeJournal(t *testing.T) {
h := newDbHarness(t)
defer h.close()
@@ -901,7 +923,7 @@ func TestDb_RecoverWithLargeJournal(t *testing.T) {
v.release()
}
-func TestDb_CompactionsGenerateMultipleFiles(t *testing.T) {
+func TestDB_CompactionsGenerateMultipleFiles(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{
WriteBuffer: 10000000,
Compression: opt.NoCompression,
@@ -939,11 +961,11 @@ func TestDb_CompactionsGenerateMultipleFiles(t *testing.T) {
}
}
-func TestDb_RepeatedWritesToSameKey(t *testing.T) {
+func TestDB_RepeatedWritesToSameKey(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100000})
defer h.close()
- maxTables := kNumLevels + kL0_StopWritesTrigger
+ maxTables := h.o.GetNumLevel() + h.o.GetWriteL0PauseTrigger()
value := strings.Repeat("v", 2*h.o.GetWriteBuffer())
for i := 0; i < 5*maxTables; i++ {
@@ -955,13 +977,13 @@ func TestDb_RepeatedWritesToSameKey(t *testing.T) {
}
}
-func TestDb_RepeatedWritesToSameKeyAfterReopen(t *testing.T) {
+func TestDB_RepeatedWritesToSameKeyAfterReopen(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100000})
defer h.close()
h.reopenDB()
- maxTables := kNumLevels + kL0_StopWritesTrigger
+ maxTables := h.o.GetNumLevel() + h.o.GetWriteL0PauseTrigger()
value := strings.Repeat("v", 2*h.o.GetWriteBuffer())
for i := 0; i < 5*maxTables; i++ {
@@ -973,11 +995,11 @@ func TestDb_RepeatedWritesToSameKeyAfterReopen(t *testing.T) {
}
}
-func TestDb_SparseMerge(t *testing.T) {
+func TestDB_SparseMerge(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{Compression: opt.NoCompression})
defer h.close()
- h.putMulti(kNumLevels, "A", "Z")
+ h.putMulti(h.o.GetNumLevel(), "A", "Z")
// Suppose there is:
// small amount of data with prefix A
@@ -1001,6 +1023,7 @@ func TestDb_SparseMerge(t *testing.T) {
h.put("C", "vc2")
h.compactMem()
+ h.waitCompaction()
h.maxNextLevelOverlappingBytes(20 * 1048576)
h.compactRangeAt(0, "", "")
h.waitCompaction()
@@ -1010,7 +1033,7 @@ func TestDb_SparseMerge(t *testing.T) {
h.maxNextLevelOverlappingBytes(20 * 1048576)
}
-func TestDb_SizeOf(t *testing.T) {
+func TestDB_SizeOf(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{
Compression: opt.NoCompression,
WriteBuffer: 10000000,
@@ -1060,7 +1083,7 @@ func TestDb_SizeOf(t *testing.T) {
}
}
-func TestDb_SizeOf_MixOfSmallAndLarge(t *testing.T) {
+func TestDB_SizeOf_MixOfSmallAndLarge(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{Compression: opt.NoCompression})
defer h.close()
@@ -1098,7 +1121,7 @@ func TestDb_SizeOf_MixOfSmallAndLarge(t *testing.T) {
}
}
-func TestDb_Snapshot(t *testing.T) {
+func TestDB_Snapshot(t *testing.T) {
trun(t, func(h *dbHarness) {
h.put("foo", "v1")
s1 := h.getSnapshot()
@@ -1127,13 +1150,51 @@ func TestDb_Snapshot(t *testing.T) {
})
}
-func TestDb_HiddenValuesAreRemoved(t *testing.T) {
+func TestDB_SnapshotList(t *testing.T) {
+ db := &DB{snapsList: list.New()}
+ e0a := db.acquireSnapshot()
+ e0b := db.acquireSnapshot()
+ db.seq = 1
+ e1 := db.acquireSnapshot()
+ db.seq = 2
+ e2 := db.acquireSnapshot()
+
+ if db.minSeq() != 0 {
+ t.Fatalf("invalid sequence number, got=%d", db.minSeq())
+ }
+ db.releaseSnapshot(e0a)
+ if db.minSeq() != 0 {
+ t.Fatalf("invalid sequence number, got=%d", db.minSeq())
+ }
+ db.releaseSnapshot(e2)
+ if db.minSeq() != 0 {
+ t.Fatalf("invalid sequence number, got=%d", db.minSeq())
+ }
+ db.releaseSnapshot(e0b)
+ if db.minSeq() != 1 {
+ t.Fatalf("invalid sequence number, got=%d", db.minSeq())
+ }
+ e2 = db.acquireSnapshot()
+ if db.minSeq() != 1 {
+ t.Fatalf("invalid sequence number, got=%d", db.minSeq())
+ }
+ db.releaseSnapshot(e1)
+ if db.minSeq() != 2 {
+ t.Fatalf("invalid sequence number, got=%d", db.minSeq())
+ }
+ db.releaseSnapshot(e2)
+ if db.minSeq() != 2 {
+ t.Fatalf("invalid sequence number, got=%d", db.minSeq())
+ }
+}
+
+func TestDB_HiddenValuesAreRemoved(t *testing.T) {
trun(t, func(h *dbHarness) {
s := h.db.s
h.put("foo", "v1")
h.compactMem()
- m := kMaxMemCompactLevel
+ m := h.o.GetMaxMemCompationLevel()
v := s.version()
num := v.tLen(m)
v.release()
@@ -1170,14 +1231,14 @@ func TestDb_HiddenValuesAreRemoved(t *testing.T) {
})
}
-func TestDb_DeletionMarkers2(t *testing.T) {
+func TestDB_DeletionMarkers2(t *testing.T) {
h := newDbHarness(t)
defer h.close()
s := h.db.s
h.put("foo", "v1")
h.compactMem()
- m := kMaxMemCompactLevel
+ m := h.o.GetMaxMemCompationLevel()
v := s.version()
num := v.tLen(m)
v.release()
@@ -1211,8 +1272,8 @@ func TestDb_DeletionMarkers2(t *testing.T) {
h.allEntriesFor("foo", "[ ]")
}
-func TestDb_CompactionTableOpenError(t *testing.T) {
- h := newDbHarnessWopt(t, &opt.Options{MaxOpenFiles: 0})
+func TestDB_CompactionTableOpenError(t *testing.T) {
+ h := newDbHarnessWopt(t, &opt.Options{OpenFilesCacheCapacity: -1})
defer h.close()
im := 10
@@ -1230,14 +1291,14 @@ func TestDb_CompactionTableOpenError(t *testing.T) {
t.Errorf("total tables is %d, want %d", n, im)
}
- h.stor.SetOpenErr(storage.TypeTable)
+ h.stor.SetEmuErr(storage.TypeTable, tsOpOpen)
go h.db.CompactRange(util.Range{})
if err := h.db.compSendIdle(h.db.tcompCmdC); err != nil {
t.Log("compaction error: ", err)
}
h.closeDB0()
h.openDB()
- h.stor.SetOpenErr(0)
+ h.stor.SetEmuErr(0, tsOpOpen)
for i := 0; i < im; i++ {
for j := 0; j < jm; j++ {
@@ -1246,9 +1307,9 @@ func TestDb_CompactionTableOpenError(t *testing.T) {
}
}
-func TestDb_OverlapInLevel0(t *testing.T) {
+func TestDB_OverlapInLevel0(t *testing.T) {
trun(t, func(h *dbHarness) {
- if kMaxMemCompactLevel != 2 {
+ if h.o.GetMaxMemCompationLevel() != 2 {
t.Fatal("fix test to reflect the config")
}
@@ -1289,7 +1350,7 @@ func TestDb_OverlapInLevel0(t *testing.T) {
})
}
-func TestDb_L0_CompactionBug_Issue44_a(t *testing.T) {
+func TestDB_L0_CompactionBug_Issue44_a(t *testing.T) {
h := newDbHarness(t)
defer h.close()
@@ -1309,7 +1370,7 @@ func TestDb_L0_CompactionBug_Issue44_a(t *testing.T) {
h.getKeyVal("(a->v)")
}
-func TestDb_L0_CompactionBug_Issue44_b(t *testing.T) {
+func TestDB_L0_CompactionBug_Issue44_b(t *testing.T) {
h := newDbHarness(t)
defer h.close()
@@ -1338,7 +1399,7 @@ func TestDb_L0_CompactionBug_Issue44_b(t *testing.T) {
h.getKeyVal("(->)(c->cv)")
}
-func TestDb_SingleEntryMemCompaction(t *testing.T) {
+func TestDB_SingleEntryMemCompaction(t *testing.T) {
trun(t, func(h *dbHarness) {
for i := 0; i < 10; i++ {
h.put("big", strings.Repeat("v", opt.DefaultWriteBuffer))
@@ -1355,7 +1416,7 @@ func TestDb_SingleEntryMemCompaction(t *testing.T) {
})
}
-func TestDb_ManifestWriteError(t *testing.T) {
+func TestDB_ManifestWriteError(t *testing.T) {
for i := 0; i < 2; i++ {
func() {
h := newDbHarness(t)
@@ -1368,23 +1429,23 @@ func TestDb_ManifestWriteError(t *testing.T) {
h.compactMem()
h.getVal("foo", "bar")
v := h.db.s.version()
- if n := v.tLen(kMaxMemCompactLevel); n != 1 {
+ if n := v.tLen(h.o.GetMaxMemCompationLevel()); n != 1 {
t.Errorf("invalid total tables, want=1 got=%d", n)
}
v.release()
if i == 0 {
- h.stor.SetWriteErr(storage.TypeManifest)
+ h.stor.SetEmuErr(storage.TypeManifest, tsOpWrite)
} else {
- h.stor.SetSyncErr(storage.TypeManifest)
+ h.stor.SetEmuErr(storage.TypeManifest, tsOpSync)
}
// Merging compaction (will fail)
- h.compactRangeAtErr(kMaxMemCompactLevel, "", "", true)
+ h.compactRangeAtErr(h.o.GetMaxMemCompationLevel(), "", "", true)
h.db.Close()
- h.stor.SetWriteErr(0)
- h.stor.SetSyncErr(0)
+ h.stor.SetEmuErr(0, tsOpWrite)
+ h.stor.SetEmuErr(0, tsOpSync)
// Should not lose data
h.openDB()
@@ -1405,7 +1466,7 @@ func assertErr(t *testing.T, err error, wanterr bool) {
}
}
-func TestDb_ClosedIsClosed(t *testing.T) {
+func TestDB_ClosedIsClosed(t *testing.T) {
h := newDbHarness(t)
db := h.db
@@ -1500,7 +1561,7 @@ func (p numberComparer) Compare(a, b []byte) int {
func (numberComparer) Separator(dst, a, b []byte) []byte { return nil }
func (numberComparer) Successor(dst, b []byte) []byte { return nil }
-func TestDb_CustomComparer(t *testing.T) {
+func TestDB_CustomComparer(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{
Comparer: numberComparer{},
WriteBuffer: 1000,
@@ -1530,11 +1591,11 @@ func TestDb_CustomComparer(t *testing.T) {
}
}
-func TestDb_ManualCompaction(t *testing.T) {
+func TestDB_ManualCompaction(t *testing.T) {
h := newDbHarness(t)
defer h.close()
- if kMaxMemCompactLevel != 2 {
+ if h.o.GetMaxMemCompationLevel() != 2 {
t.Fatal("fix test to reflect the config")
}
@@ -1568,10 +1629,10 @@ func TestDb_ManualCompaction(t *testing.T) {
h.tablesPerLevel("0,0,1")
}
-func TestDb_BloomFilter(t *testing.T) {
+func TestDB_BloomFilter(t *testing.T) {
h := newDbHarnessWopt(t, &opt.Options{
- BlockCache: opt.NoCache,
- Filter: filter.NewBloomFilter(10),
+ DisableBlockCache: true,
+ Filter: filter.NewBloomFilter(10),
})
defer h.close()
@@ -1579,7 +1640,7 @@ func TestDb_BloomFilter(t *testing.T) {
return fmt.Sprintf("key%06d", i)
}
- n := 10000
+ const n = 10000
// Populate multiple layers
for i := 0; i < n; i++ {
@@ -1621,7 +1682,7 @@ func TestDb_BloomFilter(t *testing.T) {
h.stor.ReleaseSync(storage.TypeTable)
}
-func TestDb_Concurrent(t *testing.T) {
+func TestDB_Concurrent(t *testing.T) {
const n, secs, maxkey = 4, 2, 1000
runtime.GOMAXPROCS(n)
@@ -1686,7 +1747,7 @@ func TestDb_Concurrent(t *testing.T) {
runtime.GOMAXPROCS(1)
}
-func TestDb_Concurrent2(t *testing.T) {
+func TestDB_Concurrent2(t *testing.T) {
const n, n2 = 4, 4000
runtime.GOMAXPROCS(n*2 + 2)
@@ -1757,7 +1818,7 @@ func TestDb_Concurrent2(t *testing.T) {
runtime.GOMAXPROCS(1)
}
-func TestDb_CreateReopenDbOnFile(t *testing.T) {
+func TestDB_CreateReopenDbOnFile(t *testing.T) {
dbpath := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbtestCreateReopenDbOnFile-%d", os.Getuid()))
if err := os.RemoveAll(dbpath); err != nil {
t.Fatal("cannot remove old db: ", err)
@@ -1785,7 +1846,7 @@ func TestDb_CreateReopenDbOnFile(t *testing.T) {
}
}
-func TestDb_CreateReopenDbOnFile2(t *testing.T) {
+func TestDB_CreateReopenDbOnFile2(t *testing.T) {
dbpath := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbtestCreateReopenDbOnFile2-%d", os.Getuid()))
if err := os.RemoveAll(dbpath); err != nil {
t.Fatal("cannot remove old db: ", err)
@@ -1806,7 +1867,7 @@ func TestDb_CreateReopenDbOnFile2(t *testing.T) {
}
}
-func TestDb_DeletionMarkersOnMemdb(t *testing.T) {
+func TestDB_DeletionMarkersOnMemdb(t *testing.T) {
h := newDbHarness(t)
defer h.close()
@@ -1817,8 +1878,8 @@ func TestDb_DeletionMarkersOnMemdb(t *testing.T) {
h.getKeyVal("")
}
-func TestDb_LeveldbIssue178(t *testing.T) {
- nKeys := (kMaxTableSize / 30) * 5
+func TestDB_LeveldbIssue178(t *testing.T) {
+ nKeys := (opt.DefaultCompactionTableSize / 30) * 5
key1 := func(i int) string {
return fmt.Sprintf("my_key_%d", i)
}
@@ -1860,7 +1921,7 @@ func TestDb_LeveldbIssue178(t *testing.T) {
h.assertNumKeys(nKeys)
}
-func TestDb_LeveldbIssue200(t *testing.T) {
+func TestDB_LeveldbIssue200(t *testing.T) {
h := newDbHarness(t)
defer h.close()
@@ -1886,3 +1947,719 @@ func TestDb_LeveldbIssue200(t *testing.T) {
iter.Next()
assertBytes(t, []byte("5"), iter.Key())
}
+
+func TestDB_GoleveldbIssue74(t *testing.T) {
+ h := newDbHarnessWopt(t, &opt.Options{
+ WriteBuffer: 1 * opt.MiB,
+ })
+ defer h.close()
+
+ const n, dur = 10000, 5 * time.Second
+
+ runtime.GOMAXPROCS(runtime.NumCPU())
+
+ until := time.Now().Add(dur)
+ wg := new(sync.WaitGroup)
+ wg.Add(2)
+ var done uint32
+ go func() {
+ var i int
+ defer func() {
+ t.Logf("WRITER DONE #%d", i)
+ atomic.StoreUint32(&done, 1)
+ wg.Done()
+ }()
+
+ b := new(Batch)
+ for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ {
+ iv := fmt.Sprintf("VAL%010d", i)
+ for k := 0; k < n; k++ {
+ key := fmt.Sprintf("KEY%06d", k)
+ b.Put([]byte(key), []byte(key+iv))
+ b.Put([]byte(fmt.Sprintf("PTR%06d", k)), []byte(key))
+ }
+ h.write(b)
+
+ b.Reset()
+ snap := h.getSnapshot()
+ iter := snap.NewIterator(util.BytesPrefix([]byte("PTR")), nil)
+ var k int
+ for ; iter.Next(); k++ {
+ ptrKey := iter.Key()
+ key := iter.Value()
+
+ if _, err := snap.Get(ptrKey, nil); err != nil {
+ t.Fatalf("WRITER #%d snapshot.Get %q: %v", i, ptrKey, err)
+ }
+ if value, err := snap.Get(key, nil); err != nil {
+ t.Fatalf("WRITER #%d snapshot.Get %q: %v", i, key, err)
+ } else if string(value) != string(key)+iv {
+ t.Fatalf("WRITER #%d snapshot.Get %q got invalid value, want %q got %q", i, key, string(key)+iv, value)
+ }
+
+ b.Delete(key)
+ b.Delete(ptrKey)
+ }
+ h.write(b)
+ iter.Release()
+ snap.Release()
+ if k != n {
+ t.Fatalf("#%d %d != %d", i, k, n)
+ }
+ }
+ }()
+ go func() {
+ var i int
+ defer func() {
+ t.Logf("READER DONE #%d", i)
+ atomic.StoreUint32(&done, 1)
+ wg.Done()
+ }()
+ for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ {
+ snap := h.getSnapshot()
+ iter := snap.NewIterator(util.BytesPrefix([]byte("PTR")), nil)
+ var prevValue string
+ var k int
+ for ; iter.Next(); k++ {
+ ptrKey := iter.Key()
+ key := iter.Value()
+
+ if _, err := snap.Get(ptrKey, nil); err != nil {
+ t.Fatalf("READER #%d snapshot.Get %q: %v", i, ptrKey, err)
+ }
+
+ if value, err := snap.Get(key, nil); err != nil {
+ t.Fatalf("READER #%d snapshot.Get %q: %v", i, key, err)
+ } else if prevValue != "" && string(value) != string(key)+prevValue {
+ t.Fatalf("READER #%d snapshot.Get %q got invalid value, want %q got %q", i, key, string(key)+prevValue, value)
+ } else {
+ prevValue = string(value[len(key):])
+ }
+ }
+ iter.Release()
+ snap.Release()
+ if k > 0 && k != n {
+ t.Fatalf("#%d %d != %d", i, k, n)
+ }
+ }
+ }()
+ wg.Wait()
+}
+
+func TestDB_GetProperties(t *testing.T) {
+ h := newDbHarness(t)
+ defer h.close()
+
+ _, err := h.db.GetProperty("leveldb.num-files-at-level")
+ if err == nil {
+ t.Error("GetProperty() failed to detect missing level")
+ }
+
+ _, err = h.db.GetProperty("leveldb.num-files-at-level0")
+ if err != nil {
+ t.Error("got unexpected error", err)
+ }
+
+ _, err = h.db.GetProperty("leveldb.num-files-at-level0x")
+ if err == nil {
+ t.Error("GetProperty() failed to detect invalid level")
+ }
+}
+
+func TestDB_GoleveldbIssue72and83(t *testing.T) {
+ h := newDbHarnessWopt(t, &opt.Options{
+ WriteBuffer: 1 * opt.MiB,
+ OpenFilesCacheCapacity: 3,
+ })
+ defer h.close()
+
+ const n, wn, dur = 10000, 100, 30 * time.Second
+
+ runtime.GOMAXPROCS(runtime.NumCPU())
+
+ randomData := func(prefix byte, i int) []byte {
+ data := make([]byte, 1+4+32+64+32)
+ _, err := crand.Reader.Read(data[1 : len(data)-8])
+ if err != nil {
+ panic(err)
+ }
+ data[0] = prefix
+ binary.LittleEndian.PutUint32(data[len(data)-8:], uint32(i))
+ binary.LittleEndian.PutUint32(data[len(data)-4:], util.NewCRC(data[:len(data)-4]).Value())
+ return data
+ }
+
+ keys := make([][]byte, n)
+ for i := range keys {
+ keys[i] = randomData(1, 0)
+ }
+
+ until := time.Now().Add(dur)
+ wg := new(sync.WaitGroup)
+ wg.Add(3)
+ var done uint32
+ go func() {
+ i := 0
+ defer func() {
+ t.Logf("WRITER DONE #%d", i)
+ wg.Done()
+ }()
+
+ b := new(Batch)
+ for ; i < wn && atomic.LoadUint32(&done) == 0; i++ {
+ b.Reset()
+ for _, k1 := range keys {
+ k2 := randomData(2, i)
+ b.Put(k2, randomData(42, i))
+ b.Put(k1, k2)
+ }
+ if err := h.db.Write(b, h.wo); err != nil {
+ atomic.StoreUint32(&done, 1)
+ t.Fatalf("WRITER #%d db.Write: %v", i, err)
+ }
+ }
+ }()
+ go func() {
+ var i int
+ defer func() {
+ t.Logf("READER0 DONE #%d", i)
+ atomic.StoreUint32(&done, 1)
+ wg.Done()
+ }()
+ for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ {
+ snap := h.getSnapshot()
+ seq := snap.elem.seq
+ if seq == 0 {
+ snap.Release()
+ continue
+ }
+ iter := snap.NewIterator(util.BytesPrefix([]byte{1}), nil)
+ writei := int(seq/(n*2) - 1)
+ var k int
+ for ; iter.Next(); k++ {
+ k1 := iter.Key()
+ k2 := iter.Value()
+ k1checksum0 := binary.LittleEndian.Uint32(k1[len(k1)-4:])
+ k1checksum1 := util.NewCRC(k1[:len(k1)-4]).Value()
+ if k1checksum0 != k1checksum1 {
+ t.Fatalf("READER0 #%d.%d W#%d invalid K1 checksum: %#x != %#x", i, k, k1checksum0, k1checksum0)
+ }
+ k2checksum0 := binary.LittleEndian.Uint32(k2[len(k2)-4:])
+ k2checksum1 := util.NewCRC(k2[:len(k2)-4]).Value()
+ if k2checksum0 != k2checksum1 {
+ t.Fatalf("READER0 #%d.%d W#%d invalid K2 checksum: %#x != %#x", i, k, k2checksum0, k2checksum1)
+ }
+ kwritei := int(binary.LittleEndian.Uint32(k2[len(k2)-8:]))
+ if writei != kwritei {
+ t.Fatalf("READER0 #%d.%d W#%d invalid write iteration num: %d", i, k, writei, kwritei)
+ }
+ if _, err := snap.Get(k2, nil); err != nil {
+ t.Fatalf("READER0 #%d.%d W#%d snap.Get: %v\nk1: %x\n -> k2: %x", i, k, writei, err, k1, k2)
+ }
+ }
+ if err := iter.Error(); err != nil {
+ t.Fatalf("READER0 #%d.%d W#%d snap.Iterator: %v", i, k, writei, err)
+ }
+ iter.Release()
+ snap.Release()
+ if k > 0 && k != n {
+ t.Fatalf("READER0 #%d W#%d short read, got=%d want=%d", i, writei, k, n)
+ }
+ }
+ }()
+ go func() {
+ var i int
+ defer func() {
+ t.Logf("READER1 DONE #%d", i)
+ atomic.StoreUint32(&done, 1)
+ wg.Done()
+ }()
+ for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ {
+ iter := h.db.NewIterator(nil, nil)
+ seq := iter.(*dbIter).seq
+ if seq == 0 {
+ iter.Release()
+ continue
+ }
+ writei := int(seq/(n*2) - 1)
+ var k int
+ for ok := iter.Last(); ok; ok = iter.Prev() {
+ k++
+ }
+ if err := iter.Error(); err != nil {
+ t.Fatalf("READER1 #%d.%d W#%d db.Iterator: %v", i, k, writei, err)
+ }
+ iter.Release()
+ if m := (writei+1)*n + n; k != m {
+ t.Fatalf("READER1 #%d W#%d short read, got=%d want=%d", i, writei, k, m)
+ }
+ }
+ }()
+
+ wg.Wait()
+}
+
+func TestDB_TransientError(t *testing.T) {
+ h := newDbHarnessWopt(t, &opt.Options{
+ WriteBuffer: 128 * opt.KiB,
+ OpenFilesCacheCapacity: 3,
+ DisableCompactionBackoff: true,
+ })
+ defer h.close()
+
+ const (
+ nSnap = 20
+ nKey = 10000
+ )
+
+ var (
+ snaps [nSnap]*Snapshot
+ b = &Batch{}
+ )
+ for i := range snaps {
+ vtail := fmt.Sprintf("VAL%030d", i)
+ b.Reset()
+ for k := 0; k < nKey; k++ {
+ key := fmt.Sprintf("KEY%8d", k)
+ b.Put([]byte(key), []byte(key+vtail))
+ }
+ h.stor.SetEmuRandErr(storage.TypeTable, tsOpOpen, tsOpRead, tsOpReadAt)
+ if err := h.db.Write(b, nil); err != nil {
+ t.Logf("WRITE #%d error: %v", i, err)
+ h.stor.SetEmuRandErr(0, tsOpOpen, tsOpRead, tsOpReadAt, tsOpWrite)
+ for {
+ if err := h.db.Write(b, nil); err == nil {
+ break
+ } else if errors.IsCorrupted(err) {
+ t.Fatalf("WRITE #%d corrupted: %v", i, err)
+ }
+ }
+ }
+
+ snaps[i] = h.db.newSnapshot()
+ b.Reset()
+ for k := 0; k < nKey; k++ {
+ key := fmt.Sprintf("KEY%8d", k)
+ b.Delete([]byte(key))
+ }
+ h.stor.SetEmuRandErr(storage.TypeTable, tsOpOpen, tsOpRead, tsOpReadAt)
+ if err := h.db.Write(b, nil); err != nil {
+ t.Logf("WRITE #%d error: %v", i, err)
+ h.stor.SetEmuRandErr(0, tsOpOpen, tsOpRead, tsOpReadAt)
+ for {
+ if err := h.db.Write(b, nil); err == nil {
+ break
+ } else if errors.IsCorrupted(err) {
+ t.Fatalf("WRITE #%d corrupted: %v", i, err)
+ }
+ }
+ }
+ }
+ h.stor.SetEmuRandErr(0, tsOpOpen, tsOpRead, tsOpReadAt)
+
+ runtime.GOMAXPROCS(runtime.NumCPU())
+
+ rnd := rand.New(rand.NewSource(0xecafdaed))
+ wg := &sync.WaitGroup{}
+ for i, snap := range snaps {
+ wg.Add(2)
+
+ go func(i int, snap *Snapshot, sk []int) {
+ defer wg.Done()
+
+ vtail := fmt.Sprintf("VAL%030d", i)
+ for _, k := range sk {
+ key := fmt.Sprintf("KEY%8d", k)
+ xvalue, err := snap.Get([]byte(key), nil)
+ if err != nil {
+ t.Fatalf("READER_GET #%d SEQ=%d K%d error: %v", i, snap.elem.seq, k, err)
+ }
+ value := key + vtail
+ if !bytes.Equal([]byte(value), xvalue) {
+ t.Fatalf("READER_GET #%d SEQ=%d K%d invalid value: want %q, got %q", i, snap.elem.seq, k, value, xvalue)
+ }
+ }
+ }(i, snap, rnd.Perm(nKey))
+
+ go func(i int, snap *Snapshot) {
+ defer wg.Done()
+
+ vtail := fmt.Sprintf("VAL%030d", i)
+ iter := snap.NewIterator(nil, nil)
+ defer iter.Release()
+ for k := 0; k < nKey; k++ {
+ if !iter.Next() {
+ if err := iter.Error(); err != nil {
+ t.Fatalf("READER_ITER #%d K%d error: %v", i, k, err)
+ } else {
+ t.Fatalf("READER_ITER #%d K%d eoi", i, k)
+ }
+ }
+ key := fmt.Sprintf("KEY%8d", k)
+ xkey := iter.Key()
+ if !bytes.Equal([]byte(key), xkey) {
+ t.Fatalf("READER_ITER #%d K%d invalid key: want %q, got %q", i, k, key, xkey)
+ }
+ value := key + vtail
+ xvalue := iter.Value()
+ if !bytes.Equal([]byte(value), xvalue) {
+ t.Fatalf("READER_ITER #%d K%d invalid value: want %q, got %q", i, k, value, xvalue)
+ }
+ }
+ }(i, snap)
+ }
+
+ wg.Wait()
+}
+
+func TestDB_UkeyShouldntHopAcrossTable(t *testing.T) {
+ h := newDbHarnessWopt(t, &opt.Options{
+ WriteBuffer: 112 * opt.KiB,
+ CompactionTableSize: 90 * opt.KiB,
+ CompactionExpandLimitFactor: 1,
+ })
+ defer h.close()
+
+ const (
+ nSnap = 190
+ nKey = 140
+ )
+
+ var (
+ snaps [nSnap]*Snapshot
+ b = &Batch{}
+ )
+ for i := range snaps {
+ vtail := fmt.Sprintf("VAL%030d", i)
+ b.Reset()
+ for k := 0; k < nKey; k++ {
+ key := fmt.Sprintf("KEY%08d", k)
+ b.Put([]byte(key), []byte(key+vtail))
+ }
+ if err := h.db.Write(b, nil); err != nil {
+ t.Fatalf("WRITE #%d error: %v", i, err)
+ }
+
+ snaps[i] = h.db.newSnapshot()
+ b.Reset()
+ for k := 0; k < nKey; k++ {
+ key := fmt.Sprintf("KEY%08d", k)
+ b.Delete([]byte(key))
+ }
+ if err := h.db.Write(b, nil); err != nil {
+ t.Fatalf("WRITE #%d error: %v", i, err)
+ }
+ }
+
+ h.compactMem()
+
+ h.waitCompaction()
+ for level, tables := range h.db.s.stVersion.tables {
+ for _, table := range tables {
+ t.Logf("L%d@%d %q:%q", level, table.file.Num(), table.imin, table.imax)
+ }
+ }
+
+ h.compactRangeAt(0, "", "")
+ h.waitCompaction()
+ for level, tables := range h.db.s.stVersion.tables {
+ for _, table := range tables {
+ t.Logf("L%d@%d %q:%q", level, table.file.Num(), table.imin, table.imax)
+ }
+ }
+ h.compactRangeAt(1, "", "")
+ h.waitCompaction()
+ for level, tables := range h.db.s.stVersion.tables {
+ for _, table := range tables {
+ t.Logf("L%d@%d %q:%q", level, table.file.Num(), table.imin, table.imax)
+ }
+ }
+ runtime.GOMAXPROCS(runtime.NumCPU())
+
+ wg := &sync.WaitGroup{}
+ for i, snap := range snaps {
+ wg.Add(1)
+
+ go func(i int, snap *Snapshot) {
+ defer wg.Done()
+
+ vtail := fmt.Sprintf("VAL%030d", i)
+ for k := 0; k < nKey; k++ {
+ key := fmt.Sprintf("KEY%08d", k)
+ xvalue, err := snap.Get([]byte(key), nil)
+ if err != nil {
+ t.Fatalf("READER_GET #%d SEQ=%d K%d error: %v", i, snap.elem.seq, k, err)
+ }
+ value := key + vtail
+ if !bytes.Equal([]byte(value), xvalue) {
+ t.Fatalf("READER_GET #%d SEQ=%d K%d invalid value: want %q, got %q", i, snap.elem.seq, k, value, xvalue)
+ }
+ }
+ }(i, snap)
+ }
+
+ wg.Wait()
+}
+
+func TestDB_TableCompactionBuilder(t *testing.T) {
+ stor := newTestStorage(t)
+ defer stor.Close()
+
+ const nSeq = 99
+
+ o := &opt.Options{
+ WriteBuffer: 112 * opt.KiB,
+ CompactionTableSize: 43 * opt.KiB,
+ CompactionExpandLimitFactor: 1,
+ CompactionGPOverlapsFactor: 1,
+ DisableBlockCache: true,
+ }
+ s, err := newSession(stor, o)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if err := s.create(); err != nil {
+ t.Fatal(err)
+ }
+ defer s.close()
+ var (
+ seq uint64
+ targetSize = 5 * o.CompactionTableSize
+ value = bytes.Repeat([]byte{'0'}, 100)
+ )
+ for i := 0; i < 2; i++ {
+ tw, err := s.tops.create()
+ if err != nil {
+ t.Fatal(err)
+ }
+ for k := 0; tw.tw.BytesLen() < targetSize; k++ {
+ key := []byte(fmt.Sprintf("%09d", k))
+ seq += nSeq - 1
+ for x := uint64(0); x < nSeq; x++ {
+ if err := tw.append(newIkey(key, seq-x, ktVal), value); err != nil {
+ t.Fatal(err)
+ }
+ }
+ }
+ tf, err := tw.finish()
+ if err != nil {
+ t.Fatal(err)
+ }
+ rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
+ rec.addTableFile(i, tf)
+ if err := s.commit(rec); err != nil {
+ t.Fatal(err)
+ }
+ }
+
+ // Build grandparent.
+ v := s.version()
+ c := newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...))
+ rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
+ b := &tableCompactionBuilder{
+ s: s,
+ c: c,
+ rec: rec,
+ stat1: new(cStatsStaging),
+ minSeq: 0,
+ strict: true,
+ tableSize: o.CompactionTableSize/3 + 961,
+ }
+ if err := b.run(new(compactionTransactCounter)); err != nil {
+ t.Fatal(err)
+ }
+ for _, t := range c.tables[0] {
+ rec.delTable(c.level, t.file.Num())
+ }
+ if err := s.commit(rec); err != nil {
+ t.Fatal(err)
+ }
+ c.release()
+
+ // Build level-1.
+ v = s.version()
+ c = newCompaction(s, v, 0, append(tFiles{}, v.tables[0]...))
+ rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
+ b = &tableCompactionBuilder{
+ s: s,
+ c: c,
+ rec: rec,
+ stat1: new(cStatsStaging),
+ minSeq: 0,
+ strict: true,
+ tableSize: o.CompactionTableSize,
+ }
+ if err := b.run(new(compactionTransactCounter)); err != nil {
+ t.Fatal(err)
+ }
+ for _, t := range c.tables[0] {
+ rec.delTable(c.level, t.file.Num())
+ }
+ // Move grandparent to level-3
+ for _, t := range v.tables[2] {
+ rec.delTable(2, t.file.Num())
+ rec.addTableFile(3, t)
+ }
+ if err := s.commit(rec); err != nil {
+ t.Fatal(err)
+ }
+ c.release()
+
+ v = s.version()
+ for level, want := range []bool{false, true, false, true, false} {
+ got := len(v.tables[level]) > 0
+ if want != got {
+ t.Fatalf("invalid level-%d tables len: want %v, got %v", level, want, got)
+ }
+ }
+ for i, f := range v.tables[1][:len(v.tables[1])-1] {
+ nf := v.tables[1][i+1]
+ if bytes.Equal(f.imax.ukey(), nf.imin.ukey()) {
+ t.Fatalf("KEY %q hop across table %d .. %d", f.imax.ukey(), f.file.Num(), nf.file.Num())
+ }
+ }
+ v.release()
+
+ // Compaction with transient error.
+ v = s.version()
+ c = newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...))
+ rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
+ b = &tableCompactionBuilder{
+ s: s,
+ c: c,
+ rec: rec,
+ stat1: new(cStatsStaging),
+ minSeq: 0,
+ strict: true,
+ tableSize: o.CompactionTableSize,
+ }
+ stor.SetEmuErrOnce(storage.TypeTable, tsOpSync)
+ stor.SetEmuRandErr(storage.TypeTable, tsOpRead, tsOpReadAt, tsOpWrite)
+ stor.SetEmuRandErrProb(0xf0)
+ for {
+ if err := b.run(new(compactionTransactCounter)); err != nil {
+ t.Logf("(expected) b.run: %v", err)
+ } else {
+ break
+ }
+ }
+ if err := s.commit(rec); err != nil {
+ t.Fatal(err)
+ }
+ c.release()
+
+ stor.SetEmuErrOnce(0, tsOpSync)
+ stor.SetEmuRandErr(0, tsOpRead, tsOpReadAt, tsOpWrite)
+
+ v = s.version()
+ if len(v.tables[1]) != len(v.tables[2]) {
+ t.Fatalf("invalid tables length, want %d, got %d", len(v.tables[1]), len(v.tables[2]))
+ }
+ for i, f0 := range v.tables[1] {
+ f1 := v.tables[2][i]
+ iter0 := s.tops.newIterator(f0, nil, nil)
+ iter1 := s.tops.newIterator(f1, nil, nil)
+ for j := 0; true; j++ {
+ next0 := iter0.Next()
+ next1 := iter1.Next()
+ if next0 != next1 {
+ t.Fatalf("#%d.%d invalid eoi: want %v, got %v", i, j, next0, next1)
+ }
+ key0 := iter0.Key()
+ key1 := iter1.Key()
+ if !bytes.Equal(key0, key1) {
+ t.Fatalf("#%d.%d invalid key: want %q, got %q", i, j, key0, key1)
+ }
+ if next0 == false {
+ break
+ }
+ }
+ iter0.Release()
+ iter1.Release()
+ }
+ v.release()
+}
+
+func testDB_IterTriggeredCompaction(t *testing.T, limitDiv int) {
+ const (
+ vSize = 200 * opt.KiB
+ tSize = 100 * opt.MiB
+ mIter = 100
+ n = tSize / vSize
+ )
+
+ h := newDbHarnessWopt(t, &opt.Options{
+ Compression: opt.NoCompression,
+ DisableBlockCache: true,
+ })
+ defer h.close()
+
+ key := func(x int) string {
+ return fmt.Sprintf("v%06d", x)
+ }
+
+ // Fill.
+ value := strings.Repeat("x", vSize)
+ for i := 0; i < n; i++ {
+ h.put(key(i), value)
+ }
+ h.compactMem()
+
+ // Delete all.
+ for i := 0; i < n; i++ {
+ h.delete(key(i))
+ }
+ h.compactMem()
+
+ var (
+ limit = n / limitDiv
+
+ startKey = key(0)
+ limitKey = key(limit)
+ maxKey = key(n)
+ slice = &util.Range{Limit: []byte(limitKey)}
+
+ initialSize0 = h.sizeOf(startKey, limitKey)
+ initialSize1 = h.sizeOf(limitKey, maxKey)
+ )
+
+ t.Logf("inital size %s [rest %s]", shortenb(int(initialSize0)), shortenb(int(initialSize1)))
+
+ for r := 0; true; r++ {
+ if r >= mIter {
+ t.Fatal("taking too long to compact")
+ }
+
+ // Iterates.
+ iter := h.db.NewIterator(slice, h.ro)
+ for iter.Next() {
+ }
+ if err := iter.Error(); err != nil {
+ t.Fatalf("Iter err: %v", err)
+ }
+ iter.Release()
+
+ // Wait compaction.
+ h.waitCompaction()
+
+ // Check size.
+ size0 := h.sizeOf(startKey, limitKey)
+ size1 := h.sizeOf(limitKey, maxKey)
+ t.Logf("#%03d size %s [rest %s]", r, shortenb(int(size0)), shortenb(int(size1)))
+ if size0 < initialSize0/10 {
+ break
+ }
+ }
+
+ if initialSize1 > 0 {
+ h.sizeAssert(limitKey, maxKey, initialSize1/4-opt.MiB, initialSize1+opt.MiB)
+ }
+}
+
+func TestDB_IterTriggeredCompaction(t *testing.T) {
+ testDB_IterTriggeredCompaction(t, 1)
+}
+
+func TestDB_IterTriggeredCompactionHalf(t *testing.T) {
+ testDB_IterTriggeredCompaction(t, 2)
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go
index 2db007e27..a8a2bdf72 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go
@@ -7,8 +7,7 @@
package leveldb
import (
- "errors"
-
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
@@ -32,40 +31,44 @@ func (p Sizes) Sum() (n uint64) {
return n
}
+// Logging.
+func (db *DB) log(v ...interface{}) { db.s.log(v...) }
+func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) }
+
// Check and clean files.
-func (d *DB) checkAndCleanFiles() error {
- s := d.s
+func (db *DB) checkAndCleanFiles() error {
+ v := db.s.version()
+ defer v.release()
- v := s.version_NB()
- tables := make(map[uint64]bool)
- for _, tt := range v.tables {
- for _, t := range tt {
- tables[t.file.Num()] = false
+ tablesMap := make(map[uint64]bool)
+ for _, tables := range v.tables {
+ for _, t := range tables {
+ tablesMap[t.file.Num()] = false
}
}
- ff, err := s.getFiles(storage.TypeAll)
+ files, err := db.s.getFiles(storage.TypeAll)
if err != nil {
return err
}
var nTables int
var rem []storage.File
- for _, f := range ff {
+ for _, f := range files {
keep := true
switch f.Type() {
case storage.TypeManifest:
- keep = f.Num() >= s.manifestFile.Num()
+ keep = f.Num() >= db.s.manifestFile.Num()
case storage.TypeJournal:
- if d.frozenJournalFile != nil {
- keep = f.Num() >= d.frozenJournalFile.Num()
+ if db.frozenJournalFile != nil {
+ keep = f.Num() >= db.frozenJournalFile.Num()
} else {
- keep = f.Num() >= d.journalFile.Num()
+ keep = f.Num() >= db.journalFile.Num()
}
case storage.TypeTable:
- _, keep = tables[f.Num()]
+ _, keep = tablesMap[f.Num()]
if keep {
- tables[f.Num()] = true
+ tablesMap[f.Num()] = true
nTables++
}
}
@@ -75,18 +78,20 @@ func (d *DB) checkAndCleanFiles() error {
}
}
- if nTables != len(tables) {
- for num, present := range tables {
+ if nTables != len(tablesMap) {
+ var missing []*storage.FileInfo
+ for num, present := range tablesMap {
if !present {
- s.logf("db@janitor table missing @%d", num)
+ missing = append(missing, &storage.FileInfo{Type: storage.TypeTable, Num: num})
+ db.logf("db@janitor table missing @%d", num)
}
}
- return ErrCorrupted{Type: MissingFiles, Err: errors.New("leveldb: table files missing")}
+ return errors.NewErrCorrupted(nil, &errors.ErrMissingFiles{Files: missing})
}
- s.logf("db@janitor F·%d G·%d", len(ff), len(rem))
+ db.logf("db@janitor F·%d G·%d", len(files), len(rem))
for _, f := range rem {
- s.logf("db@janitor removing %s-%d", f.Type(), f.Num())
+ db.logf("db@janitor removing %s-%d", f.Type(), f.Num())
if err := f.Remove(); err != nil {
return err
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go
index 4660e840c..e1cf30c53 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go
@@ -14,84 +14,93 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
-func (d *DB) writeJournal(b *Batch) error {
- w, err := d.journal.Next()
+func (db *DB) writeJournal(b *Batch) error {
+ w, err := db.journal.Next()
if err != nil {
return err
}
if _, err := w.Write(b.encode()); err != nil {
return err
}
- if err := d.journal.Flush(); err != nil {
+ if err := db.journal.Flush(); err != nil {
return err
}
if b.sync {
- return d.journalWriter.Sync()
+ return db.journalWriter.Sync()
}
return nil
}
-func (d *DB) jWriter() {
- defer d.closeW.Done()
+func (db *DB) jWriter() {
+ defer db.closeW.Done()
for {
select {
- case b := <-d.journalC:
+ case b := <-db.journalC:
if b != nil {
- d.journalAckC <- d.writeJournal(b)
+ db.journalAckC <- db.writeJournal(b)
}
- case _, _ = <-d.closeC:
+ case _, _ = <-db.closeC:
return
}
}
}
-func (d *DB) rotateMem(n int) (mem *memdb.DB, err error) {
+func (db *DB) rotateMem(n int) (mem *memDB, err error) {
// Wait for pending memdb compaction.
- err = d.compSendIdle(d.mcompCmdC)
+ err = db.compSendIdle(db.mcompCmdC)
if err != nil {
return
}
// Create new memdb and journal.
- mem, err = d.newMem(n)
+ mem, err = db.newMem(n)
if err != nil {
return
}
// Schedule memdb compaction.
- d.compTrigger(d.mcompTriggerC)
+ db.compSendTrigger(db.mcompCmdC)
return
}
-func (d *DB) flush(n int) (mem *memdb.DB, nn int, err error) {
- s := d.s
-
+func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
delayed := false
- flush := func() bool {
- v := s.version()
+ flush := func() (retry bool) {
+ v := db.s.version()
defer v.release()
- mem = d.getEffectiveMem()
- nn = mem.Free()
+ mem = db.getEffectiveMem()
+ defer func() {
+ if retry {
+ mem.decref()
+ mem = nil
+ }
+ }()
+ nn = mem.mdb.Free()
switch {
- case v.tLen(0) >= kL0_SlowdownWritesTrigger && !delayed:
+ case v.tLen(0) >= db.s.o.GetWriteL0SlowdownTrigger() && !delayed:
delayed = true
time.Sleep(time.Millisecond)
case nn >= n:
return false
- case v.tLen(0) >= kL0_StopWritesTrigger:
+ case v.tLen(0) >= db.s.o.GetWriteL0PauseTrigger():
delayed = true
- err = d.compSendIdle(d.tcompCmdC)
+ err = db.compSendIdle(db.tcompCmdC)
if err != nil {
return false
}
default:
// Allow memdb to grow if it has no entry.
- if mem.Len() == 0 {
+ if mem.mdb.Len() == 0 {
nn = n
- return false
+ } else {
+ mem.decref()
+ mem, err = db.rotateMem(n)
+ if err == nil {
+ nn = mem.mdb.Free()
+ } else {
+ nn = 0
+ }
}
- mem, err = d.rotateMem(n)
- nn = mem.Free()
return false
}
return true
@@ -100,7 +109,12 @@ func (d *DB) flush(n int) (mem *memdb.DB, nn int, err error) {
for flush() {
}
if delayed {
- s.logf("db@write delayed T·%v", time.Since(start))
+ db.writeDelay += time.Since(start)
+ db.writeDelayN++
+ } else if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+ db.writeDelay = 0
+ db.writeDelayN = 0
}
return
}
@@ -109,39 +123,45 @@ func (d *DB) flush(n int) (mem *memdb.DB, nn int, err error) {
// sequentially.
//
// It is safe to modify the contents of the arguments after Write returns.
-func (d *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) {
- err = d.ok()
- if err != nil || b == nil || b.len() == 0 {
+func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) {
+ err = db.ok()
+ if err != nil || b == nil || b.Len() == 0 {
return
}
b.init(wo.GetSync())
// The write happen synchronously.
-retry:
select {
- case d.writeC <- b:
- if <-d.writeMergedC {
- return <-d.writeAckC
+ case db.writeC <- b:
+ if <-db.writeMergedC {
+ return <-db.writeAckC
}
- goto retry
- case d.writeLockC <- struct{}{}:
- case _, _ = <-d.closeC:
+ case db.writeLockC <- struct{}{}:
+ case err = <-db.compPerErrC:
+ return
+ case _, _ = <-db.closeC:
return ErrClosed
}
merged := 0
+ danglingMerge := false
defer func() {
- <-d.writeLockC
+ if danglingMerge {
+ db.writeMergedC <- false
+ } else {
+ <-db.writeLockC
+ }
for i := 0; i < merged; i++ {
- d.writeAckC <- err
+ db.writeAckC <- err
}
}()
- mem, memFree, err := d.flush(b.size())
+ mem, memFree, err := db.flush(b.size())
if err != nil {
return
}
+ defer mem.decref()
// Calculate maximum size of the batch.
m := 1 << 20
@@ -154,13 +174,13 @@ retry:
drain:
for b.size() < m && !b.sync {
select {
- case nb := <-d.writeC:
+ case nb := <-db.writeC:
if b.size()+nb.size() <= m {
b.append(nb)
- d.writeMergedC <- true
+ db.writeMergedC <- true
merged++
} else {
- d.writeMergedC <- false
+ danglingMerge = true
break drain
}
default:
@@ -169,44 +189,52 @@ drain:
}
// Set batch first seq number relative from last seq.
- b.seq = d.seq + 1
+ b.seq = db.seq + 1
// Write journal concurrently if it is large enough.
if b.size() >= (128 << 10) {
// Push the write batch to the journal writer
select {
- case _, _ = <-d.closeC:
+ case db.journalC <- b:
+ // Write into memdb
+ if berr := b.memReplay(mem.mdb); berr != nil {
+ panic(berr)
+ }
+ case err = <-db.compPerErrC:
+ return
+ case _, _ = <-db.closeC:
err = ErrClosed
return
- case d.journalC <- b:
- // Write into memdb
- b.memReplay(mem)
}
// Wait for journal writer
select {
- case _, _ = <-d.closeC:
- err = ErrClosed
- return
- case err = <-d.journalAckC:
+ case err = <-db.journalAckC:
if err != nil {
// Revert memdb if error detected
- b.revertMemReplay(mem)
+ if berr := b.revertMemReplay(mem.mdb); berr != nil {
+ panic(berr)
+ }
return
}
+ case _, _ = <-db.closeC:
+ err = ErrClosed
+ return
}
} else {
- err = d.writeJournal(b)
+ err = db.writeJournal(b)
if err != nil {
return
}
- b.memReplay(mem)
+ if berr := b.memReplay(mem.mdb); berr != nil {
+ panic(berr)
+ }
}
// Set last seq number.
- d.addSeq(uint64(b.len()))
+ db.addSeq(uint64(b.Len()))
if b.size() >= memFree {
- d.rotateMem(0)
+ db.rotateMem(0)
}
return
}
@@ -215,20 +243,20 @@ drain:
// for that key; a DB is not a multi-map.
//
// It is safe to modify the contents of the arguments after Put returns.
-func (d *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
+func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
b := new(Batch)
b.Put(key, value)
- return d.Write(b, wo)
+ return db.Write(b, wo)
}
// Delete deletes the value for the given key. It returns ErrNotFound if
// the DB does not contain the key.
//
// It is safe to modify the contents of the arguments after Delete returns.
-func (d *DB) Delete(key []byte, wo *opt.WriteOptions) error {
+func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
b := new(Batch)
b.Delete(key)
- return d.Write(b, wo)
+ return db.Write(b, wo)
}
func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
@@ -247,33 +275,37 @@ func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
// A nil Range.Start is treated as a key before all keys in the DB.
// And a nil Range.Limit is treated as a key after all keys in the DB.
// Therefore if both is nil then it will compact entire DB.
-func (d *DB) CompactRange(r util.Range) error {
- if err := d.ok(); err != nil {
+func (db *DB) CompactRange(r util.Range) error {
+ if err := db.ok(); err != nil {
return err
}
+ // Lock writer.
select {
- case d.writeLockC <- struct{}{}:
- case _, _ = <-d.closeC:
+ case db.writeLockC <- struct{}{}:
+ case err := <-db.compPerErrC:
+ return err
+ case _, _ = <-db.closeC:
return ErrClosed
}
// Check for overlaps in memdb.
- mem := d.getEffectiveMem()
- if isMemOverlaps(d.s.icmp, mem, r.Start, r.Limit) {
+ mem := db.getEffectiveMem()
+ defer mem.decref()
+ if isMemOverlaps(db.s.icmp, mem.mdb, r.Start, r.Limit) {
// Memdb compaction.
- if _, err := d.rotateMem(0); err != nil {
- <-d.writeLockC
+ if _, err := db.rotateMem(0); err != nil {
+ <-db.writeLockC
return err
}
- <-d.writeLockC
- if err := d.compSendIdle(d.mcompCmdC); err != nil {
+ <-db.writeLockC
+ if err := db.compSendIdle(db.mcompCmdC); err != nil {
return err
}
} else {
- <-d.writeLockC
+ <-db.writeLockC
}
// Table compaction.
- return d.compSendRange(d.tcompCmdC, -1, r.Start, r.Limit)
+ return db.compSendRange(db.tcompCmdC, -1, r.Start, r.Limit)
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go
index ac9ea3d0c..53f13bb24 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go
@@ -37,6 +37,16 @@
// err = iter.Error()
// ...
//
+// Iterate over subset of database content with a particular prefix:
+// iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
+// for iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
// Seek-then-Iterate:
//
// iter := db.NewIterator(nil, nil)
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go
index 8066bd9a4..29d0d2f27 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go
@@ -7,32 +7,12 @@
package leveldb
import (
- "errors"
-
- "github.com/syndtr/goleveldb/leveldb/util"
+ "github.com/syndtr/goleveldb/leveldb/errors"
)
var (
- ErrNotFound = util.ErrNotFound
+ ErrNotFound = errors.ErrNotFound
ErrSnapshotReleased = errors.New("leveldb: snapshot released")
ErrIterReleased = errors.New("leveldb: iterator released")
ErrClosed = errors.New("leveldb: closed")
)
-
-type CorruptionType int
-
-const (
- CorruptedManifest CorruptionType = iota
- MissingFiles
-)
-
-// ErrCorrupted is the type that wraps errors that indicate corruption in
-// the database.
-type ErrCorrupted struct {
- Type CorruptionType
- Err error
-}
-
-func (e ErrCorrupted) Error() string {
- return e.Err.Error()
-}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors/errors.go
new file mode 100644
index 000000000..84b5d6b7b
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors/errors.go
@@ -0,0 +1,76 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package errors provides common error types used throughout leveldb.
+package errors
+
+import (
+ "errors"
+ "fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+ ErrNotFound = New("leveldb: not found")
+ ErrReleased = util.ErrReleased
+ ErrHasReleaser = util.ErrHasReleaser
+)
+
+// New returns an error that formats as the given text.
+func New(text string) error {
+ return errors.New(text)
+}
+
+// ErrCorrupted is the type that wraps errors that indicate corruption in
+// the database.
+type ErrCorrupted struct {
+ File *storage.FileInfo
+ Err error
+}
+
+func (e *ErrCorrupted) Error() string {
+ if e.File != nil {
+ return fmt.Sprintf("%v [file=%v]", e.Err, e.File)
+ } else {
+ return e.Err.Error()
+ }
+}
+
+// NewErrCorrupted creates new ErrCorrupted error.
+func NewErrCorrupted(f storage.File, err error) error {
+ return &ErrCorrupted{storage.NewFileInfo(f), err}
+}
+
+// IsCorrupted returns a boolean indicating whether the error is indicating
+// a corruption.
+func IsCorrupted(err error) bool {
+ switch err.(type) {
+ case *ErrCorrupted:
+ return true
+ }
+ return false
+}
+
+// ErrMissingFiles is the type that indicating a corruption due to missing
+// files.
+type ErrMissingFiles struct {
+ Files []*storage.FileInfo
+}
+
+func (e *ErrMissingFiles) Error() string { return "file missing" }
+
+// SetFile sets 'file info' of the given error with the given file.
+// Currently only ErrCorrupted is supported, otherwise will do nothing.
+func SetFile(err error, f storage.File) error {
+ switch x := err.(type) {
+ case *ErrCorrupted:
+ x.File = storage.NewFileInfo(f)
+ return x
+ }
+ return err
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go
index d7dff04b6..b328ece4e 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go
@@ -17,13 +17,14 @@ import (
var _ = testutil.Defer(func() {
Describe("Leveldb external", func() {
o := &opt.Options{
- BlockCache: opt.NoCache,
- BlockRestartInterval: 5,
- BlockSize: 50,
- Compression: opt.NoCompression,
- MaxOpenFiles: 0,
- Strict: opt.StrictAll,
- WriteBuffer: 1000,
+ DisableBlockCache: true,
+ BlockRestartInterval: 5,
+ BlockSize: 80,
+ Compression: opt.NoCompression,
+ OpenFilesCacheCapacity: -1,
+ Strict: opt.StrictAll,
+ WriteBuffer: 1000,
+ CompactionTableSize: 2000,
}
Describe("write test", func() {
@@ -36,22 +37,21 @@ var _ = testutil.Defer(func() {
testutil.DoDBTesting(&t)
db.TestClose()
done <- true
- }, 9.0)
+ }, 20.0)
})
Describe("read test", func() {
- testutil.AllKeyValueTesting(nil, func(kv testutil.KeyValue) testutil.DB {
+ testutil.AllKeyValueTesting(nil, nil, func(kv testutil.KeyValue) testutil.DB {
// Building the DB.
db := newTestingDB(o, nil, nil)
kv.IterateShuffled(nil, func(i int, key, value []byte) {
err := db.TestPut(key, value)
Expect(err).NotTo(HaveOccurred())
})
- testutil.Defer("teardown", func() {
- db.TestClose()
- })
return db
+ }, func(db testutil.DB) {
+ db.(*testingDB).TestClose()
})
})
})
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
index 9b4b72741..a23ab05f7 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
@@ -40,13 +40,19 @@ type basicArrayIterator struct {
util.BasicReleaser
array BasicArray
pos int
+ err error
}
func (i *basicArrayIterator) Valid() bool {
- return i.pos >= 0 && i.pos < i.array.Len()
+ return i.pos >= 0 && i.pos < i.array.Len() && !i.Released()
}
func (i *basicArrayIterator) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
if i.array.Len() == 0 {
i.pos = -1
return false
@@ -56,6 +62,11 @@ func (i *basicArrayIterator) First() bool {
}
func (i *basicArrayIterator) Last() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
n := i.array.Len()
if n == 0 {
i.pos = 0
@@ -66,6 +77,11 @@ func (i *basicArrayIterator) Last() bool {
}
func (i *basicArrayIterator) Seek(key []byte) bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
n := i.array.Len()
if n == 0 {
i.pos = 0
@@ -79,6 +95,11 @@ func (i *basicArrayIterator) Seek(key []byte) bool {
}
func (i *basicArrayIterator) Next() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
i.pos++
if n := i.array.Len(); i.pos >= n {
i.pos = n
@@ -88,6 +109,11 @@ func (i *basicArrayIterator) Next() bool {
}
func (i *basicArrayIterator) Prev() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
i.pos--
if i.pos < 0 {
i.pos = -1
@@ -96,7 +122,7 @@ func (i *basicArrayIterator) Prev() bool {
return true
}
-func (i *basicArrayIterator) Error() error { return nil }
+func (i *basicArrayIterator) Error() error { return i.err }
type arrayIterator struct {
basicArrayIterator
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
index 1e99a2bf6..939adbb93 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
@@ -7,6 +7,7 @@
package iterator
import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/util"
)
@@ -22,13 +23,13 @@ type IteratorIndexer interface {
type indexedIterator struct {
util.BasicReleaser
- index IteratorIndexer
- strict bool
- strictGet bool
+ index IteratorIndexer
+ strict bool
- data Iterator
- err error
- errf func(err error)
+ data Iterator
+ err error
+ errf func(err error)
+ closed bool
}
func (i *indexedIterator) setData() {
@@ -36,11 +37,6 @@ func (i *indexedIterator) setData() {
i.data.Release()
}
i.data = i.index.Get()
- if i.strictGet {
- if err := i.data.Error(); err != nil {
- i.err = err
- }
- }
}
func (i *indexedIterator) clearData() {
@@ -50,14 +46,21 @@ func (i *indexedIterator) clearData() {
i.data = nil
}
-func (i *indexedIterator) dataErr() bool {
- if i.errf != nil {
- if err := i.data.Error(); err != nil {
+func (i *indexedIterator) indexErr() {
+ if err := i.index.Error(); err != nil {
+ if i.errf != nil {
i.errf(err)
}
+ i.err = err
}
- if i.strict {
- if err := i.data.Error(); err != nil {
+}
+
+func (i *indexedIterator) dataErr() bool {
+ if err := i.data.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ if i.strict || !errors.IsCorrupted(err) {
i.err = err
return true
}
@@ -72,9 +75,13 @@ func (i *indexedIterator) Valid() bool {
func (i *indexedIterator) First() bool {
if i.err != nil {
return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
}
if !i.index.First() {
+ i.indexErr()
i.clearData()
return false
}
@@ -85,9 +92,13 @@ func (i *indexedIterator) First() bool {
func (i *indexedIterator) Last() bool {
if i.err != nil {
return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
}
if !i.index.Last() {
+ i.indexErr()
i.clearData()
return false
}
@@ -105,9 +116,13 @@ func (i *indexedIterator) Last() bool {
func (i *indexedIterator) Seek(key []byte) bool {
if i.err != nil {
return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
}
if !i.index.Seek(key) {
+ i.indexErr()
i.clearData()
return false
}
@@ -125,6 +140,9 @@ func (i *indexedIterator) Seek(key []byte) bool {
func (i *indexedIterator) Next() bool {
if i.err != nil {
return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
}
switch {
@@ -136,6 +154,7 @@ func (i *indexedIterator) Next() bool {
fallthrough
case i.data == nil:
if !i.index.Next() {
+ i.indexErr()
return false
}
i.setData()
@@ -147,6 +166,9 @@ func (i *indexedIterator) Next() bool {
func (i *indexedIterator) Prev() bool {
if i.err != nil {
return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
}
switch {
@@ -158,6 +180,7 @@ func (i *indexedIterator) Prev() bool {
fallthrough
case i.data == nil:
if !i.index.Prev() {
+ i.indexErr()
return false
}
i.setData()
@@ -206,16 +229,14 @@ func (i *indexedIterator) SetErrorCallback(f func(err error)) {
i.errf = f
}
-// NewIndexedIterator returns an indexed iterator. An index is iterator
-// that returns another iterator, a data iterator. A data iterator is the
+// NewIndexedIterator returns an 'indexed iterator'. An index is iterator
+// that returns another iterator, a 'data iterator'. A 'data iterator' is the
// iterator that contains actual key/value pairs.
//
-// If strict is true then error yield by data iterator will halt the indexed
-// iterator, on contrary if strict is false then the indexed iterator will
-// ignore those error and move on to the next index. If strictGet is true and
-// index.Get() yield an 'error iterator' then the indexed iterator will be halted.
-// An 'error iterator' is iterator which its Error() method always return non-nil
-// even before any 'seeks method' is called.
-func NewIndexedIterator(index IteratorIndexer, strict, strictGet bool) Iterator {
- return &indexedIterator{index: index, strict: strict, strictGet: strictGet}
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'indexed iterator', otherwise the iterator will
+// continue to the next 'data iterator'. Corruption on 'index iterator' will not be
+// ignored and will halt the iterator.
+func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator {
+ return &indexedIterator{index: index, strict: strict}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go
index 6a89b3830..72a797892 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go
@@ -65,7 +65,7 @@ var _ = testutil.Defer(func() {
// Test the iterator.
t := testutil.IteratorTesting{
KeyValue: kv.Clone(),
- Iter: NewIndexedIterator(NewArrayIndexer(index), true, true),
+ Iter: NewIndexedIterator(NewArrayIndexer(index), true),
}
testutil.DoIteratorTesting(&t)
done <- true
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
index 1b80184e8..c2522860b 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
@@ -14,6 +14,10 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
+var (
+ ErrIterReleased = errors.New("leveldb/iterator: iterator released")
+)
+
// IteratorSeeker is the interface that wraps the 'seeks method'.
type IteratorSeeker interface {
// First moves the iterator to the first key/value pair. If the iterator
@@ -100,28 +104,13 @@ type ErrorCallbackSetter interface {
}
type emptyIterator struct {
- releaser util.Releaser
- released bool
- err error
+ util.BasicReleaser
+ err error
}
func (i *emptyIterator) rErr() {
- if i.err == nil && i.released {
- i.err = errors.New("leveldb/iterator: iterator released")
- }
-}
-
-func (i *emptyIterator) Release() {
- if i.releaser != nil {
- i.releaser.Release()
- i.releaser = nil
- }
- i.released = true
-}
-
-func (i *emptyIterator) SetReleaser(releaser util.Releaser) {
- if !i.released {
- i.releaser = releaser
+ if i.err == nil && i.Released() {
+ i.err = ErrIterReleased
}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go
index 7ec2fc6f2..5ef8d5baf 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go
@@ -3,15 +3,9 @@ package iterator_test
import (
"testing"
- . "github.com/onsi/ginkgo"
- . "github.com/onsi/gomega"
-
"github.com/syndtr/goleveldb/leveldb/testutil"
)
func TestIterator(t *testing.T) {
- testutil.RunDefer()
-
- RegisterFailHandler(Fail)
- RunSpecs(t, "Iterator Suite")
+ testutil.RunSuite(t, "Iterator Suite")
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
index c8314c4e5..1a7e29df8 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
@@ -7,16 +7,11 @@
package iterator
import (
- "errors"
-
"github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/util"
)
-var (
- ErrIterReleased = errors.New("leveldb/iterator: iterator released")
-)
-
type dir int
const (
@@ -48,13 +43,11 @@ func assertKey(key []byte) []byte {
}
func (i *mergedIterator) iterErr(iter Iterator) bool {
- if i.errf != nil {
- if err := iter.Error(); err != nil {
+ if err := iter.Error(); err != nil {
+ if i.errf != nil {
i.errf(err)
}
- }
- if i.strict {
- if err := iter.Error(); err != nil {
+ if i.strict || !errors.IsCorrupted(err) {
i.err = err
return true
}
@@ -274,9 +267,13 @@ func (i *mergedIterator) Release() {
}
func (i *mergedIterator) SetReleaser(releaser util.Releaser) {
- if i.dir != dirReleased {
- i.releaser = releaser
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
}
+ i.releaser = releaser
}
func (i *mergedIterator) Error() error {
@@ -294,9 +291,9 @@ func (i *mergedIterator) SetErrorCallback(f func(err error)) {
// keys: if iters[i] contains a key k then iters[j] will not contain that key k.
// None of the iters may be nil.
//
-// If strict is true then error yield by any iterators will halt the merged
-// iterator, on contrary if strict is false then the merged iterator will
-// ignore those error and move on to the next iterator.
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'merged iterator', otherwise the iterator will
+// continue to the next 'input iterator'.
func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator {
return &mergedIterator{
iters: iters,
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go
index b522c76e6..6519ec660 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go
@@ -79,10 +79,10 @@ package journal
import (
"encoding/binary"
- "errors"
"fmt"
"io"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/util"
)
@@ -103,18 +103,18 @@ type flusher interface {
Flush() error
}
-// DroppedError is the error type that passed to Dropper.Drop method.
-type DroppedError struct {
+// ErrCorrupted is the error type that generated by corrupted block or chunk.
+type ErrCorrupted struct {
Size int
Reason string
}
-func (e DroppedError) Error() string {
- return fmt.Sprintf("leveldb/journal: dropped %d bytes: %s", e.Size, e.Reason)
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size)
}
// Dropper is the interface that wrap simple Drop method. The Drop
-// method will be called when the journal reader dropping a chunk.
+// method will be called when the journal reader dropping a block or chunk.
type Dropper interface {
Drop(err error)
}
@@ -158,76 +158,78 @@ func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader {
}
}
+var errSkip = errors.New("leveldb/journal: skipped")
+
+func (r *Reader) corrupt(n int, reason string, skip bool) error {
+ if r.dropper != nil {
+ r.dropper.Drop(&ErrCorrupted{n, reason})
+ }
+ if r.strict && !skip {
+ r.err = errors.NewErrCorrupted(nil, &ErrCorrupted{n, reason})
+ return r.err
+ }
+ return errSkip
+}
+
// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the
// next block into the buffer if necessary.
-func (r *Reader) nextChunk(wantFirst, skip bool) error {
+func (r *Reader) nextChunk(first bool) error {
for {
if r.j+headerSize <= r.n {
checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4])
length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6])
chunkType := r.buf[r.j+6]
- var err error
if checksum == 0 && length == 0 && chunkType == 0 {
// Drop entire block.
- err = DroppedError{r.n - r.j, "zero header"}
+ m := r.n - r.j
r.i = r.n
r.j = r.n
+ return r.corrupt(m, "zero header", false)
} else {
m := r.n - r.j
r.i = r.j + headerSize
r.j = r.j + headerSize + int(length)
if r.j > r.n {
// Drop entire block.
- err = DroppedError{m, "chunk length overflows block"}
r.i = r.n
r.j = r.n
+ return r.corrupt(m, "chunk length overflows block", false)
} else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() {
// Drop entire block.
- err = DroppedError{m, "checksum mismatch"}
r.i = r.n
r.j = r.n
+ return r.corrupt(m, "checksum mismatch", false)
}
}
- if wantFirst && err == nil && chunkType != fullChunkType && chunkType != firstChunkType {
- if skip {
- // The chunk are intentionally skipped.
- if chunkType == lastChunkType {
- skip = false
- }
- continue
- } else {
- // Drop the chunk.
- err = DroppedError{r.j - r.i + headerSize, "orphan chunk"}
- }
- }
- if err == nil {
- r.last = chunkType == fullChunkType || chunkType == lastChunkType
- } else {
- if r.dropper != nil {
- r.dropper.Drop(err)
- }
- if r.strict {
- r.err = err
- }
+ if first && chunkType != fullChunkType && chunkType != firstChunkType {
+ m := r.j - r.i
+ r.i = r.j
+ // Report the error, but skip it.
+ return r.corrupt(m+headerSize, "orphan chunk", true)
}
- return err
+ r.last = chunkType == fullChunkType || chunkType == lastChunkType
+ return nil
}
+
+ // The last block.
if r.n < blockSize && r.n > 0 {
- // This is the last block.
- if r.j != r.n {
- r.err = io.ErrUnexpectedEOF
- } else {
- r.err = io.EOF
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
}
+ r.err = io.EOF
return r.err
}
+
+ // Read block.
n, err := io.ReadFull(r.r, r.buf[:])
- if err != nil && err != io.ErrUnexpectedEOF {
- r.err = err
- return r.err
+ if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+ return err
}
if n == 0 {
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
+ }
r.err = io.EOF
return r.err
}
@@ -237,29 +239,26 @@ func (r *Reader) nextChunk(wantFirst, skip bool) error {
// Next returns a reader for the next journal. It returns io.EOF if there are no
// more journals. The reader returned becomes stale after the next Next call,
-// and should no longer be used.
+// and should no longer be used. If strict is false, the reader will returns
+// io.ErrUnexpectedEOF error when found corrupted journal.
func (r *Reader) Next() (io.Reader, error) {
r.seq++
if r.err != nil {
return nil, r.err
}
- skip := !r.last
+ r.i = r.j
for {
- r.i = r.j
- if r.nextChunk(true, skip) != nil {
- // So that 'orphan chunk' drop will be reported.
- skip = false
- } else {
+ if err := r.nextChunk(true); err == nil {
break
- }
- if r.err != nil {
- return nil, r.err
+ } else if err != errSkip {
+ return nil, err
}
}
return &singleReader{r, r.seq, nil}, nil
}
-// Reset resets the journal reader, allows reuse of the journal reader.
+// Reset resets the journal reader, allows reuse of the journal reader. Reset returns
+// last accumulated error.
func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error {
r.seq++
err := r.err
@@ -296,7 +295,11 @@ func (x *singleReader) Read(p []byte) (int, error) {
if r.last {
return 0, io.EOF
}
- if x.err = r.nextChunk(false, false); x.err != nil {
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
return 0, x.err
}
}
@@ -320,7 +323,11 @@ func (x *singleReader) ReadByte() (byte, error) {
if r.last {
return 0, io.EOF
}
- if x.err = r.nextChunk(false, false); x.err != nil {
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
return 0, x.err
}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go
index 5e1193ae2..0fcf22599 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go
@@ -12,6 +12,7 @@ package journal
import (
"bytes"
+ "encoding/binary"
"fmt"
"io"
"io/ioutil"
@@ -326,3 +327,492 @@ func TestStaleWriter(t *testing.T) {
t.Fatalf("stale write #1: unexpected error: %v", err)
}
}
+
+func TestCorrupt_MissingLastBlock(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+
+ // First record.
+ ww, err := w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize-1024)); err != nil {
+ t.Fatalf("write #0: unexpected error: %v", err)
+ }
+
+ // Second record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize-headerSize)); err != nil {
+ t.Fatalf("write #1: unexpected error: %v", err)
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // Cut the last block.
+ b := buf.Bytes()[:blockSize]
+ r := NewReader(bytes.NewReader(b), dropper{t}, false, true)
+
+ // First read.
+ rr, err := r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err := io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #0: %v", err)
+ }
+ if n != blockSize-1024 {
+ t.Fatalf("read #0: got %d bytes want %d", n, blockSize-1024)
+ }
+
+ // Second read.
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != io.ErrUnexpectedEOF {
+ t.Fatalf("read #1: unexpected error: %v", err)
+ }
+
+ if _, err := r.Next(); err != io.EOF {
+ t.Fatalf("last next: unexpected error: %v", err)
+ }
+}
+
+func TestCorrupt_CorruptedFirstBlock(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+
+ // First record.
+ ww, err := w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize/2)); err != nil {
+ t.Fatalf("write #0: unexpected error: %v", err)
+ }
+
+ // Second record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize-headerSize)); err != nil {
+ t.Fatalf("write #1: unexpected error: %v", err)
+ }
+
+ // Third record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+1)); err != nil {
+ t.Fatalf("write #2: unexpected error: %v", err)
+ }
+
+ // Fourth record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+2)); err != nil {
+ t.Fatalf("write #3: unexpected error: %v", err)
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ b := buf.Bytes()
+ // Corrupting block #0.
+ for i := 0; i < 1024; i++ {
+ b[i] = '1'
+ }
+
+ r := NewReader(bytes.NewReader(b), dropper{t}, false, true)
+
+ // First read (third record).
+ rr, err := r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err := io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #0: %v", err)
+ }
+ if want := int64(blockSize-headerSize) + 1; n != want {
+ t.Fatalf("read #0: got %d bytes want %d", n, want)
+ }
+
+ // Second read (fourth record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #1: %v", err)
+ }
+ if want := int64(blockSize-headerSize) + 2; n != want {
+ t.Fatalf("read #1: got %d bytes want %d", n, want)
+ }
+
+ if _, err := r.Next(); err != io.EOF {
+ t.Fatalf("last next: unexpected error: %v", err)
+ }
+}
+
+func TestCorrupt_CorruptedMiddleBlock(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+
+ // First record.
+ ww, err := w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize/2)); err != nil {
+ t.Fatalf("write #0: unexpected error: %v", err)
+ }
+
+ // Second record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize-headerSize)); err != nil {
+ t.Fatalf("write #1: unexpected error: %v", err)
+ }
+
+ // Third record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+1)); err != nil {
+ t.Fatalf("write #2: unexpected error: %v", err)
+ }
+
+ // Fourth record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+2)); err != nil {
+ t.Fatalf("write #3: unexpected error: %v", err)
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ b := buf.Bytes()
+ // Corrupting block #1.
+ for i := 0; i < 1024; i++ {
+ b[blockSize+i] = '1'
+ }
+
+ r := NewReader(bytes.NewReader(b), dropper{t}, false, true)
+
+ // First read (first record).
+ rr, err := r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err := io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #0: %v", err)
+ }
+ if want := int64(blockSize / 2); n != want {
+ t.Fatalf("read #0: got %d bytes want %d", n, want)
+ }
+
+ // Second read (second record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != io.ErrUnexpectedEOF {
+ t.Fatalf("read #1: unexpected error: %v", err)
+ }
+
+ // Third read (fourth record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #2: %v", err)
+ }
+ if want := int64(blockSize-headerSize) + 2; n != want {
+ t.Fatalf("read #2: got %d bytes want %d", n, want)
+ }
+
+ if _, err := r.Next(); err != io.EOF {
+ t.Fatalf("last next: unexpected error: %v", err)
+ }
+}
+
+func TestCorrupt_CorruptedLastBlock(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+
+ // First record.
+ ww, err := w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize/2)); err != nil {
+ t.Fatalf("write #0: unexpected error: %v", err)
+ }
+
+ // Second record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize-headerSize)); err != nil {
+ t.Fatalf("write #1: unexpected error: %v", err)
+ }
+
+ // Third record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+1)); err != nil {
+ t.Fatalf("write #2: unexpected error: %v", err)
+ }
+
+ // Fourth record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+2)); err != nil {
+ t.Fatalf("write #3: unexpected error: %v", err)
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ b := buf.Bytes()
+ // Corrupting block #3.
+ for i := len(b) - 1; i > len(b)-1024; i-- {
+ b[i] = '1'
+ }
+
+ r := NewReader(bytes.NewReader(b), dropper{t}, false, true)
+
+ // First read (first record).
+ rr, err := r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err := io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #0: %v", err)
+ }
+ if want := int64(blockSize / 2); n != want {
+ t.Fatalf("read #0: got %d bytes want %d", n, want)
+ }
+
+ // Second read (second record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #1: %v", err)
+ }
+ if want := int64(blockSize - headerSize); n != want {
+ t.Fatalf("read #1: got %d bytes want %d", n, want)
+ }
+
+ // Third read (third record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #2: %v", err)
+ }
+ if want := int64(blockSize-headerSize) + 1; n != want {
+ t.Fatalf("read #2: got %d bytes want %d", n, want)
+ }
+
+ // Fourth read (fourth record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != io.ErrUnexpectedEOF {
+ t.Fatalf("read #3: unexpected error: %v", err)
+ }
+
+ if _, err := r.Next(); err != io.EOF {
+ t.Fatalf("last next: unexpected error: %v", err)
+ }
+}
+
+func TestCorrupt_FirstChuckLengthOverflow(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+
+ // First record.
+ ww, err := w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize/2)); err != nil {
+ t.Fatalf("write #0: unexpected error: %v", err)
+ }
+
+ // Second record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize-headerSize)); err != nil {
+ t.Fatalf("write #1: unexpected error: %v", err)
+ }
+
+ // Third record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+1)); err != nil {
+ t.Fatalf("write #2: unexpected error: %v", err)
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ b := buf.Bytes()
+ // Corrupting record #1.
+ x := blockSize
+ binary.LittleEndian.PutUint16(b[x+4:], 0xffff)
+
+ r := NewReader(bytes.NewReader(b), dropper{t}, false, true)
+
+ // First read (first record).
+ rr, err := r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err := io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #0: %v", err)
+ }
+ if want := int64(blockSize / 2); n != want {
+ t.Fatalf("read #0: got %d bytes want %d", n, want)
+ }
+
+ // Second read (second record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != io.ErrUnexpectedEOF {
+ t.Fatalf("read #1: unexpected error: %v", err)
+ }
+
+ if _, err := r.Next(); err != io.EOF {
+ t.Fatalf("last next: unexpected error: %v", err)
+ }
+}
+
+func TestCorrupt_MiddleChuckLengthOverflow(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+
+ // First record.
+ ww, err := w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize/2)); err != nil {
+ t.Fatalf("write #0: unexpected error: %v", err)
+ }
+
+ // Second record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), blockSize-headerSize)); err != nil {
+ t.Fatalf("write #1: unexpected error: %v", err)
+ }
+
+ // Third record.
+ ww, err = w.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := ww.Write(bytes.Repeat([]byte("0"), (blockSize-headerSize)+1)); err != nil {
+ t.Fatalf("write #2: unexpected error: %v", err)
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ b := buf.Bytes()
+ // Corrupting record #1.
+ x := blockSize/2 + headerSize
+ binary.LittleEndian.PutUint16(b[x+4:], 0xffff)
+
+ r := NewReader(bytes.NewReader(b), dropper{t}, false, true)
+
+ // First read (first record).
+ rr, err := r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err := io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #0: %v", err)
+ }
+ if want := int64(blockSize / 2); n != want {
+ t.Fatalf("read #0: got %d bytes want %d", n, want)
+ }
+
+ // Second read (third record).
+ rr, err = r.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, err = io.Copy(ioutil.Discard, rr)
+ if err != nil {
+ t.Fatalf("read #1: %v", err)
+ }
+ if want := int64(blockSize-headerSize) + 1; n != want {
+ t.Fatalf("read #1: got %d bytes want %d", n, want)
+ }
+
+ if _, err := r.Next(); err != io.EOF {
+ t.Fatalf("last next: unexpected error: %v", err)
+ }
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go
index b9acf932d..572ae8150 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go
@@ -9,15 +9,30 @@ package leveldb
import (
"encoding/binary"
"fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
)
-type vType int
+type ErrIkeyCorrupted struct {
+ Ikey []byte
+ Reason string
+}
+
+func (e *ErrIkeyCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: iKey %q corrupted: %s", e.Ikey, e.Reason)
+}
+
+func newErrIkeyCorrupted(ikey []byte, reason string) error {
+ return errors.NewErrCorrupted(nil, &ErrIkeyCorrupted{append([]byte{}, ikey...), reason})
+}
+
+type kType int
-func (t vType) String() string {
- switch t {
- case tDel:
+func (kt kType) String() string {
+ switch kt {
+ case ktDel:
return "d"
- case tVal:
+ case ktVal:
return "v"
}
return "x"
@@ -26,16 +41,16 @@ func (t vType) String() string {
// Value types encoded as the last component of internal keys.
// Don't modify; this value are saved to disk.
const (
- tDel vType = iota
- tVal
+ ktDel kType = iota
+ ktVal
)
-// tSeek defines the vType that should be passed when constructing an
+// ktSeek defines the kType that should be passed when constructing an
// internal key for seeking to a particular sequence number (since we
// sort sequence numbers in decreasing order and the value type is
// embedded as the low 8 bits in the sequence number in internal keys,
// we need to use the highest-numbered ValueType, not the lowest).
-const tSeek = tVal
+const ktSeek = ktVal
const (
// Maximum value possible for sequence number; the 8-bits are
@@ -43,7 +58,7 @@ const (
// 64-bit integer.
kMaxSeq uint64 = (uint64(1) << 56) - 1
// Maximum value possible for packed sequence number and type.
- kMaxNum uint64 = (kMaxSeq << 8) | uint64(tSeek)
+ kMaxNum uint64 = (kMaxSeq << 8) | uint64(ktSeek)
)
// Maximum number encoded in bytes.
@@ -55,85 +70,73 @@ func init() {
type iKey []byte
-func newIKey(ukey []byte, seq uint64, t vType) iKey {
- if seq > kMaxSeq || t > tVal {
- panic("invalid seq number or value type")
+func newIkey(ukey []byte, seq uint64, kt kType) iKey {
+ if seq > kMaxSeq {
+ panic("leveldb: invalid sequence number")
+ } else if kt > ktVal {
+ panic("leveldb: invalid type")
}
- b := make(iKey, len(ukey)+8)
- copy(b, ukey)
- binary.LittleEndian.PutUint64(b[len(ukey):], (seq<<8)|uint64(t))
- return b
+ ik := make(iKey, len(ukey)+8)
+ copy(ik, ukey)
+ binary.LittleEndian.PutUint64(ik[len(ukey):], (seq<<8)|uint64(kt))
+ return ik
}
-func parseIkey(p []byte) (ukey []byte, seq uint64, t vType, ok bool) {
- if len(p) < 8 {
- return
+func parseIkey(ik []byte) (ukey []byte, seq uint64, kt kType, err error) {
+ if len(ik) < 8 {
+ return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid length")
}
- num := binary.LittleEndian.Uint64(p[len(p)-8:])
- seq, t = uint64(num>>8), vType(num&0xff)
- if t > tVal {
- return
+ num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
+ seq, kt = uint64(num>>8), kType(num&0xff)
+ if kt > ktVal {
+ return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid type")
}
- ukey = p[:len(p)-8]
- ok = true
+ ukey = ik[:len(ik)-8]
return
}
-func validIkey(p []byte) bool {
- _, _, _, ok := parseIkey(p)
- return ok
+func validIkey(ik []byte) bool {
+ _, _, _, err := parseIkey(ik)
+ return err == nil
}
-func (p iKey) assert() {
- if p == nil {
- panic("nil iKey")
+func (ik iKey) assert() {
+ if ik == nil {
+ panic("leveldb: nil iKey")
}
- if len(p) < 8 {
- panic(fmt.Sprintf("invalid iKey %q, len=%d", []byte(p), len(p)))
+ if len(ik) < 8 {
+ panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid length", []byte(ik), len(ik)))
}
}
-func (p iKey) ok() bool {
- if len(p) < 8 {
- return false
- }
- _, _, ok := p.parseNum()
- return ok
-}
-
-func (p iKey) ukey() []byte {
- p.assert()
- return p[:len(p)-8]
+func (ik iKey) ukey() []byte {
+ ik.assert()
+ return ik[:len(ik)-8]
}
-func (p iKey) num() uint64 {
- p.assert()
- return binary.LittleEndian.Uint64(p[len(p)-8:])
+func (ik iKey) num() uint64 {
+ ik.assert()
+ return binary.LittleEndian.Uint64(ik[len(ik)-8:])
}
-func (p iKey) parseNum() (seq uint64, t vType, ok bool) {
- if p == nil {
- panic("nil iKey")
+func (ik iKey) parseNum() (seq uint64, kt kType) {
+ num := ik.num()
+ seq, kt = uint64(num>>8), kType(num&0xff)
+ if kt > ktVal {
+ panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
}
- if len(p) < 8 {
- return
- }
- num := p.num()
- seq, t = uint64(num>>8), vType(num&0xff)
- if t > tVal {
- return 0, 0, false
- }
- ok = true
return
}
-func (p iKey) String() string {
- if len(p) == 0 {
+func (ik iKey) String() string {
+ if ik == nil {
return "<nil>"
}
- if seq, t, ok := p.parseNum(); ok {
- return fmt.Sprintf("%s,%s%d", shorten(string(p.ukey())), t, seq)
+
+ if ukey, seq, kt, err := parseIkey(ik); err == nil {
+ return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
+ } else {
+ return "<invalid>"
}
- return "<invalid>"
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go
index e307cfc1d..30eadf784 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go
@@ -15,8 +15,8 @@ import (
var defaultIComparer = &iComparer{comparer.DefaultComparer}
-func ikey(key string, seq uint64, t vType) iKey {
- return newIKey([]byte(key), uint64(seq), t)
+func ikey(key string, seq uint64, kt kType) iKey {
+ return newIkey([]byte(key), uint64(seq), kt)
}
func shortSep(a, b []byte) []byte {
@@ -37,27 +37,37 @@ func shortSuccessor(b []byte) []byte {
return dst
}
-func testSingleKey(t *testing.T, key string, seq uint64, vt vType) {
- ik := ikey(key, seq, vt)
+func testSingleKey(t *testing.T, key string, seq uint64, kt kType) {
+ ik := ikey(key, seq, kt)
if !bytes.Equal(ik.ukey(), []byte(key)) {
t.Errorf("user key does not equal, got %v, want %v", string(ik.ukey()), key)
}
- if rseq, rt, ok := ik.parseNum(); ok {
+ rseq, rt := ik.parseNum()
+ if rseq != seq {
+ t.Errorf("seq number does not equal, got %v, want %v", rseq, seq)
+ }
+ if rt != kt {
+ t.Errorf("type does not equal, got %v, want %v", rt, kt)
+ }
+
+ if rukey, rseq, rt, kerr := parseIkey(ik); kerr == nil {
+ if !bytes.Equal(rukey, []byte(key)) {
+ t.Errorf("user key does not equal, got %v, want %v", string(ik.ukey()), key)
+ }
if rseq != seq {
t.Errorf("seq number does not equal, got %v, want %v", rseq, seq)
}
-
- if rt != vt {
- t.Errorf("type does not equal, got %v, want %v", rt, vt)
+ if rt != kt {
+ t.Errorf("type does not equal, got %v, want %v", rt, kt)
}
} else {
- t.Error("cannot parse seq and type")
+ t.Errorf("key error: %v", kerr)
}
}
-func TestIKey_EncodeDecode(t *testing.T) {
+func TestIkey_EncodeDecode(t *testing.T) {
keys := []string{"", "k", "hello", "longggggggggggggggggggggg"}
seqs := []uint64{
1, 2, 3,
@@ -67,8 +77,8 @@ func TestIKey_EncodeDecode(t *testing.T) {
}
for _, key := range keys {
for _, seq := range seqs {
- testSingleKey(t, key, seq, tVal)
- testSingleKey(t, "hello", 1, tDel)
+ testSingleKey(t, key, seq, ktVal)
+ testSingleKey(t, "hello", 1, ktDel)
}
}
}
@@ -79,45 +89,45 @@ func assertBytes(t *testing.T, want, got []byte) {
}
}
-func TestIKeyShortSeparator(t *testing.T) {
+func TestIkeyShortSeparator(t *testing.T) {
// When user keys are same
- assertBytes(t, ikey("foo", 100, tVal),
- shortSep(ikey("foo", 100, tVal),
- ikey("foo", 99, tVal)))
- assertBytes(t, ikey("foo", 100, tVal),
- shortSep(ikey("foo", 100, tVal),
- ikey("foo", 101, tVal)))
- assertBytes(t, ikey("foo", 100, tVal),
- shortSep(ikey("foo", 100, tVal),
- ikey("foo", 100, tVal)))
- assertBytes(t, ikey("foo", 100, tVal),
- shortSep(ikey("foo", 100, tVal),
- ikey("foo", 100, tDel)))
+ assertBytes(t, ikey("foo", 100, ktVal),
+ shortSep(ikey("foo", 100, ktVal),
+ ikey("foo", 99, ktVal)))
+ assertBytes(t, ikey("foo", 100, ktVal),
+ shortSep(ikey("foo", 100, ktVal),
+ ikey("foo", 101, ktVal)))
+ assertBytes(t, ikey("foo", 100, ktVal),
+ shortSep(ikey("foo", 100, ktVal),
+ ikey("foo", 100, ktVal)))
+ assertBytes(t, ikey("foo", 100, ktVal),
+ shortSep(ikey("foo", 100, ktVal),
+ ikey("foo", 100, ktDel)))
// When user keys are misordered
- assertBytes(t, ikey("foo", 100, tVal),
- shortSep(ikey("foo", 100, tVal),
- ikey("bar", 99, tVal)))
+ assertBytes(t, ikey("foo", 100, ktVal),
+ shortSep(ikey("foo", 100, ktVal),
+ ikey("bar", 99, ktVal)))
// When user keys are different, but correctly ordered
- assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek),
- shortSep(ikey("foo", 100, tVal),
- ikey("hello", 200, tVal)))
+ assertBytes(t, ikey("g", uint64(kMaxSeq), ktSeek),
+ shortSep(ikey("foo", 100, ktVal),
+ ikey("hello", 200, ktVal)))
// When start user key is prefix of limit user key
- assertBytes(t, ikey("foo", 100, tVal),
- shortSep(ikey("foo", 100, tVal),
- ikey("foobar", 200, tVal)))
+ assertBytes(t, ikey("foo", 100, ktVal),
+ shortSep(ikey("foo", 100, ktVal),
+ ikey("foobar", 200, ktVal)))
// When limit user key is prefix of start user key
- assertBytes(t, ikey("foobar", 100, tVal),
- shortSep(ikey("foobar", 100, tVal),
- ikey("foo", 200, tVal)))
+ assertBytes(t, ikey("foobar", 100, ktVal),
+ shortSep(ikey("foobar", 100, ktVal),
+ ikey("foo", 200, ktVal)))
}
-func TestIKeyShortestSuccessor(t *testing.T) {
- assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek),
- shortSuccessor(ikey("foo", 100, tVal)))
- assertBytes(t, ikey("\xff\xff", 100, tVal),
- shortSuccessor(ikey("\xff\xff", 100, tVal)))
+func TestIkeyShortestSuccessor(t *testing.T) {
+ assertBytes(t, ikey("g", uint64(kMaxSeq), ktSeek),
+ shortSuccessor(ikey("foo", 100, ktVal)))
+ assertBytes(t, ikey("\xff\xff", 100, ktVal),
+ shortSuccessor(ikey("\xff\xff", 100, ktVal)))
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go
index 245b1fd4d..fefa007a7 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go
@@ -3,18 +3,9 @@ package leveldb
import (
"testing"
- . "github.com/onsi/ginkgo"
- . "github.com/onsi/gomega"
-
"github.com/syndtr/goleveldb/leveldb/testutil"
)
-func TestLeveldb(t *testing.T) {
- testutil.RunDefer()
-
- RegisterFailHandler(Fail)
- RunSpecs(t, "Leveldb Suite")
-
- RegisterTestingT(t)
- testutil.RunDefer("teardown")
+func TestLevelDB(t *testing.T) {
+ testutil.RunSuite(t, "LevelDB Suite")
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
index 7bcae992a..e5398873b 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
@@ -12,12 +12,14 @@ import (
"sync"
"github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/util"
)
var (
- ErrNotFound = util.ErrNotFound
+ ErrNotFound = errors.ErrNotFound
+ ErrIterReleased = errors.New("leveldb/memdb: iterator released")
)
const tMaxHeight = 12
@@ -29,6 +31,7 @@ type dbIter struct {
node int
forward bool
key, value []byte
+ err error
}
func (i *dbIter) fill(checkStart, checkLimit bool) bool {
@@ -59,6 +62,11 @@ func (i *dbIter) Valid() bool {
}
func (i *dbIter) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
i.forward = true
i.p.mu.RLock()
defer i.p.mu.RUnlock()
@@ -71,9 +79,11 @@ func (i *dbIter) First() bool {
}
func (i *dbIter) Last() bool {
- if i.p == nil {
+ if i.Released() {
+ i.err = ErrIterReleased
return false
}
+
i.forward = false
i.p.mu.RLock()
defer i.p.mu.RUnlock()
@@ -86,9 +96,11 @@ func (i *dbIter) Last() bool {
}
func (i *dbIter) Seek(key []byte) bool {
- if i.p == nil {
+ if i.Released() {
+ i.err = ErrIterReleased
return false
}
+
i.forward = true
i.p.mu.RLock()
defer i.p.mu.RUnlock()
@@ -100,9 +112,11 @@ func (i *dbIter) Seek(key []byte) bool {
}
func (i *dbIter) Next() bool {
- if i.p == nil {
+ if i.Released() {
+ i.err = ErrIterReleased
return false
}
+
if i.node == 0 {
if !i.forward {
return i.First()
@@ -117,9 +131,11 @@ func (i *dbIter) Next() bool {
}
func (i *dbIter) Prev() bool {
- if i.p == nil {
+ if i.Released() {
+ i.err = ErrIterReleased
return false
}
+
if i.node == 0 {
if i.forward {
return i.Last()
@@ -141,10 +157,10 @@ func (i *dbIter) Value() []byte {
return i.value
}
-func (i *dbIter) Error() error { return nil }
+func (i *dbIter) Error() error { return i.err }
func (i *dbIter) Release() {
- if i.p != nil {
+ if !i.Released() {
i.p = nil
i.node = 0
i.key = nil
@@ -437,6 +453,8 @@ func (p *DB) Reset() {
// New creates a new initalized in-memory key/value DB. The capacity
// is the initial key/value buffer capacity. The capacity is advisory,
// not enforced.
+//
+// The returned DB instance is goroutine-safe.
func New(cmp comparer.BasicComparer, capacity int) *DB {
p := &DB{
cmp: cmp,
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go
index 788539a87..18c304b7f 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go
@@ -3,15 +3,9 @@ package memdb
import (
"testing"
- . "github.com/onsi/ginkgo"
- . "github.com/onsi/gomega"
-
"github.com/syndtr/goleveldb/leveldb/testutil"
)
-func TestMemdb(t *testing.T) {
- testutil.RunDefer()
-
- RegisterFailHandler(Fail)
- RunSpecs(t, "Memdb Suite")
+func TestMemDB(t *testing.T) {
+ testutil.RunSuite(t, "MemDB Suite")
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go
index f96a9d1ea..5dd6dbc7b 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go
@@ -129,7 +129,7 @@ var _ = testutil.Defer(func() {
}
return db
- })
+ }, nil, nil)
})
})
})
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go
index b940ce427..61f0eadf9 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go
@@ -11,6 +11,7 @@ import (
"github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/filter"
+ "math"
)
const (
@@ -19,25 +20,57 @@ const (
GiB = MiB * 1024
)
-const (
- DefaultBlockCacheSize = 8 * MiB
- DefaultBlockRestartInterval = 16
- DefaultBlockSize = 4 * KiB
- DefaultCompressionType = SnappyCompression
- DefaultMaxOpenFiles = 1000
- DefaultWriteBuffer = 4 * MiB
+var (
+ DefaultBlockCacher = LRUCacher
+ DefaultBlockCacheCapacity = 8 * MiB
+ DefaultBlockRestartInterval = 16
+ DefaultBlockSize = 4 * KiB
+ DefaultCompactionExpandLimitFactor = 25
+ DefaultCompactionGPOverlapsFactor = 10
+ DefaultCompactionL0Trigger = 4
+ DefaultCompactionSourceLimitFactor = 1
+ DefaultCompactionTableSize = 2 * MiB
+ DefaultCompactionTableSizeMultiplier = 1.0
+ DefaultCompactionTotalSize = 10 * MiB
+ DefaultCompactionTotalSizeMultiplier = 10.0
+ DefaultCompressionType = SnappyCompression
+ DefaultIteratorSamplingRate = 1 * MiB
+ DefaultMaxMemCompationLevel = 2
+ DefaultNumLevel = 7
+ DefaultOpenFilesCacher = LRUCacher
+ DefaultOpenFilesCacheCapacity = 500
+ DefaultWriteBuffer = 4 * MiB
+ DefaultWriteL0PauseTrigger = 12
+ DefaultWriteL0SlowdownTrigger = 8
)
-type noCache struct{}
+// Cacher is a caching algorithm.
+type Cacher interface {
+ New(capacity int) cache.Cacher
+}
+
+type CacherFunc struct {
+ NewFunc func(capacity int) cache.Cacher
+}
+
+func (f *CacherFunc) New(capacity int) cache.Cacher {
+ if f.NewFunc != nil {
+ return f.NewFunc(capacity)
+ }
+ return nil
+}
-func (noCache) SetCapacity(capacity int) {}
-func (noCache) GetNamespace(id uint64) cache.Namespace { return nil }
-func (noCache) Purge(fin cache.PurgeFin) {}
-func (noCache) Zap(closed bool) {}
+func noCacher(int) cache.Cacher { return nil }
-var NoCache cache.Cache = noCache{}
+var (
+ // LRUCacher is the LRU-cache algorithm.
+ LRUCacher = &CacherFunc{cache.NewLRU}
-// Compression is the per-block compression algorithm to use.
+ // NoCacher is the value to disable caching algorithm.
+ NoCacher = &CacherFunc{}
+)
+
+// Compression is the 'sorted table' block compression algorithm to use.
type Compression uint
func (c Compression) String() string {
@@ -59,34 +92,47 @@ const (
nCompression
)
-// Strict is the DB strict level.
+// Strict is the DB 'strict level'.
type Strict uint
const (
// If present then a corrupted or invalid chunk or block in manifest
- // journal will cause an error istead of being dropped.
+ // journal will cause an error instead of being dropped.
+ // This will prevent database with corrupted manifest to be opened.
StrictManifest Strict = 1 << iota
- // If present then a corrupted or invalid chunk or block in journal
- // will cause an error istead of being dropped.
- StrictJournal
-
// If present then journal chunk checksum will be verified.
StrictJournalChecksum
- // If present then an invalid key/value pair will cause an error
- // instead of being skipped.
- StrictIterator
+ // If present then a corrupted or invalid chunk or block in journal
+ // will cause an error instead of being dropped.
+ // This will prevent database with corrupted journal to be opened.
+ StrictJournal
// If present then 'sorted table' block checksum will be verified.
+ // This has effect on both 'read operation' and compaction.
StrictBlockChecksum
+ // If present then a corrupted 'sorted table' will fails compaction.
+ // The database will enter read-only mode.
+ StrictCompaction
+
+ // If present then a corrupted 'sorted table' will halts 'read operation'.
+ StrictReader
+
+ // If present then leveldb.Recover will drop corrupted 'sorted table'.
+ StrictRecovery
+
+ // This only applicable for ReadOptions, if present then this ReadOptions
+ // 'strict level' will override global ones.
+ StrictOverride
+
// StrictAll enables all strict flags.
- StrictAll = StrictManifest | StrictJournal | StrictJournalChecksum | StrictIterator | StrictBlockChecksum
+ StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery
// DefaultStrict is the default strict flags. Specify any strict flags
// will override default strict flags as whole (i.e. not OR'ed).
- DefaultStrict = StrictJournalChecksum | StrictBlockChecksum
+ DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader
// NoStrict disables all strict flags. Override default strict flags.
NoStrict = ^StrictAll
@@ -101,11 +147,17 @@ type Options struct {
// The default value is nil
AltFilters []filter.Filter
- // BlockCache provides per-block caching for LevelDB. Specify NoCache to
- // disable block caching.
+ // BlockCacher provides cache algorithm for LevelDB 'sorted table' block caching.
+ // Specify NoCacher to disable caching algorithm.
//
- // By default LevelDB will create LRU-cache with capacity of 8MiB.
- BlockCache cache.Cache
+ // The default value is LRUCacher.
+ BlockCacher Cacher
+
+ // BlockCacheCapacity defines the capacity of the 'sorted table' block caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher.
+ //
+ // The default value is 8MiB.
+ BlockCacheCapacity int
// BlockRestartInterval is the number of keys between restart points for
// delta encoding of keys.
@@ -119,6 +171,73 @@ type Options struct {
// The default value is 4KiB.
BlockSize int
+ // CompactionExpandLimitFactor limits compaction size after expanded.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 25.
+ CompactionExpandLimitFactor int
+
+ // CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a
+ // single 'sorted table' generates.
+ // This will be multiplied by table size limit at grandparent level.
+ //
+ // The default value is 10.
+ CompactionGPOverlapsFactor int
+
+ // CompactionL0Trigger defines number of 'sorted table' at level-0 that will
+ // trigger compaction.
+ //
+ // The default value is 4.
+ CompactionL0Trigger int
+
+ // CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
+ // level-0.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 1.
+ CompactionSourceLimitFactor int
+
+ // CompactionTableSize limits size of 'sorted table' that compaction generates.
+ // The limits for each level will be calculated as:
+ // CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
+ //
+ // The default value is 2MiB.
+ CompactionTableSize int
+
+ // CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
+ //
+ // The default value is 1.
+ CompactionTableSizeMultiplier float64
+
+ // CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTableSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTableSizeMultiplierPerLevel []float64
+
+ // CompactionTotalSize limits total size of 'sorted table' for each level.
+ // The limits for each level will be calculated as:
+ // CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using
+ // CompactionTotalSizeMultiplierPerLevel.
+ //
+ // The default value is 10MiB.
+ CompactionTotalSize int
+
+ // CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
+ //
+ // The default value is 10.
+ CompactionTotalSizeMultiplier float64
+
+ // CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTotalSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTotalSizeMultiplierPerLevel []float64
+
// Comparer defines a total ordering over the space of []byte keys: a 'less
// than' relationship. The same comparison algorithm must be used for reads
// and writes over the lifetime of the DB.
@@ -126,11 +245,22 @@ type Options struct {
// The default value uses the same ordering as bytes.Compare.
Comparer comparer.Comparer
- // Compression defines the per-block compression to use.
+ // Compression defines the 'sorted table' block compression to use.
//
// The default value (DefaultCompression) uses snappy compression.
Compression Compression
+ // DisableBlockCache allows disable use of cache.Cache functionality on
+ // 'sorted table' block.
+ //
+ // The default value is false.
+ DisableBlockCache bool
+
+ // DisableCompactionBackoff allows disable compaction retry backoff.
+ //
+ // The default value is false.
+ DisableCompactionBackoff bool
+
// ErrorIfExist defines whether an error should returned if the DB already
// exist.
//
@@ -159,12 +289,37 @@ type Options struct {
// The default value is nil.
Filter filter.Filter
- // MaxOpenFiles defines maximum number of open files to kept around
- // (cached). This is not an hard limit, actual open files may exceed
- // the defined value.
+ // IteratorSamplingRate defines approximate gap (in bytes) between read
+ // sampling of an iterator. The samples will be used to determine when
+ // compaction should be triggered.
+ //
+ // The default is 1MiB.
+ IteratorSamplingRate int
+
+ // MaxMemCompationLevel defines maximum level a newly compacted 'memdb'
+ // will be pushed into if doesn't creates overlap. This should less than
+ // NumLevel. Use -1 for level-0.
//
- // The default value is 1000.
- MaxOpenFiles int
+ // The default is 2.
+ MaxMemCompationLevel int
+
+ // NumLevel defines number of database level. The level shouldn't changed
+ // between opens, or the database will panic.
+ //
+ // The default is 7.
+ NumLevel int
+
+ // OpenFilesCacher provides cache algorithm for open files caching.
+ // Specify NoCacher to disable caching algorithm.
+ //
+ // The default value is LRUCacher.
+ OpenFilesCacher Cacher
+
+ // OpenFilesCacheCapacity defines the capacity of the open files caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher.
+ //
+ // The default value is 500.
+ OpenFilesCacheCapacity int
// Strict defines the DB strict level.
Strict Strict
@@ -177,6 +332,18 @@ type Options struct {
//
// The default value is 4MiB.
WriteBuffer int
+
+ // WriteL0StopTrigger defines number of 'sorted table' at level-0 that will
+ // pause write.
+ //
+ // The default value is 12.
+ WriteL0PauseTrigger int
+
+ // WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that
+ // will trigger write slowdown.
+ //
+ // The default value is 8.
+ WriteL0SlowdownTrigger int
}
func (o *Options) GetAltFilters() []filter.Filter {
@@ -186,11 +353,22 @@ func (o *Options) GetAltFilters() []filter.Filter {
return o.AltFilters
}
-func (o *Options) GetBlockCache() cache.Cache {
- if o == nil {
+func (o *Options) GetBlockCacher() Cacher {
+ if o == nil || o.BlockCacher == nil {
+ return DefaultBlockCacher
+ } else if o.BlockCacher == NoCacher {
return nil
}
- return o.BlockCache
+ return o.BlockCacher
+}
+
+func (o *Options) GetBlockCacheCapacity() int {
+ if o == nil || o.BlockCacheCapacity == 0 {
+ return DefaultBlockCacheCapacity
+ } else if o.BlockCacheCapacity < 0 {
+ return 0
+ }
+ return o.BlockCacheCapacity
}
func (o *Options) GetBlockRestartInterval() int {
@@ -207,6 +385,79 @@ func (o *Options) GetBlockSize() int {
return o.BlockSize
}
+func (o *Options) GetCompactionExpandLimit(level int) int {
+ factor := DefaultCompactionExpandLimitFactor
+ if o != nil && o.CompactionExpandLimitFactor > 0 {
+ factor = o.CompactionExpandLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionGPOverlaps(level int) int {
+ factor := DefaultCompactionGPOverlapsFactor
+ if o != nil && o.CompactionGPOverlapsFactor > 0 {
+ factor = o.CompactionGPOverlapsFactor
+ }
+ return o.GetCompactionTableSize(level+2) * factor
+}
+
+func (o *Options) GetCompactionL0Trigger() int {
+ if o == nil || o.CompactionL0Trigger == 0 {
+ return DefaultCompactionL0Trigger
+ }
+ return o.CompactionL0Trigger
+}
+
+func (o *Options) GetCompactionSourceLimit(level int) int {
+ factor := DefaultCompactionSourceLimitFactor
+ if o != nil && o.CompactionSourceLimitFactor > 0 {
+ factor = o.CompactionSourceLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionTableSize(level int) int {
+ var (
+ base = DefaultCompactionTableSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTableSize > 0 {
+ base = o.CompactionTableSize
+ }
+ if len(o.CompactionTableSizeMultiplierPerLevel) > level && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTableSizeMultiplierPerLevel[level]
+ } else if o.CompactionTableSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level))
+ }
+ return int(float64(base) * mult)
+}
+
+func (o *Options) GetCompactionTotalSize(level int) int64 {
+ var (
+ base = DefaultCompactionTotalSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTotalSize > 0 {
+ base = o.CompactionTotalSize
+ }
+ if len(o.CompactionTotalSizeMultiplierPerLevel) > level && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTotalSizeMultiplierPerLevel[level]
+ } else if o.CompactionTotalSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level))
+ }
+ return int64(float64(base) * mult)
+}
+
func (o *Options) GetComparer() comparer.Comparer {
if o == nil || o.Comparer == nil {
return comparer.DefaultComparer
@@ -221,6 +472,13 @@ func (o *Options) GetCompression() Compression {
return o.Compression
}
+func (o *Options) GetDisableCompactionBackoff() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableCompactionBackoff
+}
+
func (o *Options) GetErrorIfExist() bool {
if o == nil {
return false
@@ -242,11 +500,52 @@ func (o *Options) GetFilter() filter.Filter {
return o.Filter
}
-func (o *Options) GetMaxOpenFiles() int {
- if o == nil || o.MaxOpenFiles <= 0 {
- return DefaultMaxOpenFiles
+func (o *Options) GetIteratorSamplingRate() int {
+ if o == nil || o.IteratorSamplingRate <= 0 {
+ return DefaultIteratorSamplingRate
+ }
+ return o.IteratorSamplingRate
+}
+
+func (o *Options) GetMaxMemCompationLevel() int {
+ level := DefaultMaxMemCompationLevel
+ if o != nil {
+ if o.MaxMemCompationLevel > 0 {
+ level = o.MaxMemCompationLevel
+ } else if o.MaxMemCompationLevel < 0 {
+ level = 0
+ }
+ }
+ if level >= o.GetNumLevel() {
+ return o.GetNumLevel() - 1
+ }
+ return level
+}
+
+func (o *Options) GetNumLevel() int {
+ if o == nil || o.NumLevel <= 0 {
+ return DefaultNumLevel
+ }
+ return o.NumLevel
+}
+
+func (o *Options) GetOpenFilesCacher() Cacher {
+ if o == nil || o.OpenFilesCacher == nil {
+ return DefaultOpenFilesCacher
}
- return o.MaxOpenFiles
+ if o.OpenFilesCacher == NoCacher {
+ return nil
+ }
+ return o.OpenFilesCacher
+}
+
+func (o *Options) GetOpenFilesCacheCapacity() int {
+ if o == nil || o.OpenFilesCacheCapacity == 0 {
+ return DefaultOpenFilesCacheCapacity
+ } else if o.OpenFilesCacheCapacity < 0 {
+ return 0
+ }
+ return o.OpenFilesCacheCapacity
}
func (o *Options) GetStrict(strict Strict) bool {
@@ -263,6 +562,20 @@ func (o *Options) GetWriteBuffer() int {
return o.WriteBuffer
}
+func (o *Options) GetWriteL0PauseTrigger() int {
+ if o == nil || o.WriteL0PauseTrigger == 0 {
+ return DefaultWriteL0PauseTrigger
+ }
+ return o.WriteL0PauseTrigger
+}
+
+func (o *Options) GetWriteL0SlowdownTrigger() int {
+ if o == nil || o.WriteL0SlowdownTrigger == 0 {
+ return DefaultWriteL0SlowdownTrigger
+ }
+ return o.WriteL0SlowdownTrigger
+}
+
// ReadOptions holds the optional parameters for 'read operation'. The
// 'read operation' includes Get, Find and NewIterator.
type ReadOptions struct {
@@ -273,8 +586,8 @@ type ReadOptions struct {
// The default value is false.
DontFillCache bool
- // Strict overrides global DB strict level. Only StrictIterator and
- // StrictBlockChecksum that does have effects here.
+ // Strict will be OR'ed with global DB 'strict level' unless StrictOverride
+ // is present. Currently only StrictReader that has effect here.
Strict Strict
}
@@ -316,3 +629,11 @@ func (wo *WriteOptions) GetSync() bool {
}
return wo.Sync
}
+
+func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool {
+ if ro.GetStrict(StrictOverride) {
+ return ro.GetStrict(strict)
+ } else {
+ return o.GetStrict(strict) || ro.GetStrict(strict)
+ }
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go
index fc6a96965..a3d84ef60 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go
@@ -7,35 +7,86 @@
package leveldb
import (
- "github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/opt"
)
-func (s *session) setOptions(o *opt.Options) {
- s.o = &opt.Options{}
+func dupOptions(o *opt.Options) *opt.Options {
+ newo := &opt.Options{}
if o != nil {
- *s.o = *o
+ *newo = *o
+ }
+ if newo.Strict == 0 {
+ newo.Strict = opt.DefaultStrict
}
+ return newo
+}
+
+func (s *session) setOptions(o *opt.Options) {
+ no := dupOptions(o)
// Alternative filters.
if filters := o.GetAltFilters(); len(filters) > 0 {
- s.o.AltFilters = make([]filter.Filter, len(filters))
+ no.AltFilters = make([]filter.Filter, len(filters))
for i, filter := range filters {
- s.o.AltFilters[i] = &iFilter{filter}
+ no.AltFilters[i] = &iFilter{filter}
}
}
- // Block cache.
- switch o.GetBlockCache() {
- case nil:
- s.o.BlockCache = cache.NewLRUCache(opt.DefaultBlockCacheSize)
- case opt.NoCache:
- s.o.BlockCache = nil
- }
// Comparer.
s.icmp = &iComparer{o.GetComparer()}
- s.o.Comparer = s.icmp
+ no.Comparer = s.icmp
// Filter.
if filter := o.GetFilter(); filter != nil {
- s.o.Filter = &iFilter{filter}
+ no.Filter = &iFilter{filter}
}
+
+ s.o = &cachedOptions{Options: no}
+ s.o.cache()
+}
+
+type cachedOptions struct {
+ *opt.Options
+
+ compactionExpandLimit []int
+ compactionGPOverlaps []int
+ compactionSourceLimit []int
+ compactionTableSize []int
+ compactionTotalSize []int64
+}
+
+func (co *cachedOptions) cache() {
+ numLevel := co.Options.GetNumLevel()
+
+ co.compactionExpandLimit = make([]int, numLevel)
+ co.compactionGPOverlaps = make([]int, numLevel)
+ co.compactionSourceLimit = make([]int, numLevel)
+ co.compactionTableSize = make([]int, numLevel)
+ co.compactionTotalSize = make([]int64, numLevel)
+
+ for level := 0; level < numLevel; level++ {
+ co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
+ co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
+ co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
+ co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level)
+ co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level)
+ }
+}
+
+func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
+ return co.compactionExpandLimit[level]
+}
+
+func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
+ return co.compactionGPOverlaps[level]
+}
+
+func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
+ return co.compactionSourceLimit[level]
+}
+
+func (co *cachedOptions) GetCompactionTableSize(level int) int {
+ return co.compactionTableSize[level]
+}
+
+func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
+ return co.compactionTotalSize[level]
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go
index 6b2a61683..b3906f7fc 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go
@@ -7,12 +7,13 @@
package leveldb
import (
- "errors"
+ "fmt"
"io"
"os"
"sync"
"sync/atomic"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/opt"
@@ -20,18 +21,31 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
+type ErrManifestCorrupted struct {
+ Field string
+ Reason string
+}
+
+func (e *ErrManifestCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
+}
+
+func newErrManifestCorrupted(f storage.File, field, reason string) error {
+ return errors.NewErrCorrupted(f, &ErrManifestCorrupted{field, reason})
+}
+
// session represent a persistent database session.
type session struct {
// Need 64-bit alignment.
- stFileNum uint64 // current unused file number
+ stNextFileNum uint64 // current unused file number
stJournalNum uint64 // current journal file number; need external synchronization
stPrevJournalNum uint64 // prev journal file number; no longer used; for compatibility with older version of leveldb
- stSeq uint64 // last mem compacted seq; need external synchronization
+ stSeqNum uint64 // last mem compacted seq; need external synchronization
stTempFileNum uint64
stor storage.Storage
storLock util.Releaser
- o *opt.Options
+ o *cachedOptions
icmp *iComparer
tops *tOps
@@ -39,11 +53,12 @@ type session struct {
manifestWriter storage.Writer
manifestFile storage.File
- stCPtrs [kNumLevels]iKey // compact pointers; need external synchronization
- stVersion *version // current version
- vmu sync.Mutex
+ stCompPtrs []iKey // compaction pointers; need external synchronization
+ stVersion *version // current version
+ vmu sync.Mutex
}
+// Creates new initialized session instance.
func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
if stor == nil {
return nil, os.ErrInvalid
@@ -53,22 +68,20 @@ func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
return
}
s = &session{
- stor: stor,
- storLock: storLock,
+ stor: stor,
+ storLock: storLock,
+ stCompPtrs: make([]iKey, o.GetNumLevel()),
}
s.setOptions(o)
- s.tops = newTableOps(s, s.o.GetMaxOpenFiles())
- s.setVersion(&version{s: s})
- s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock D·DeletedEntry L·Level Q·SeqNum T·TimeElapsed")
+ s.tops = newTableOps(s)
+ s.setVersion(newVersion(s))
+ s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed")
return
}
// Close session.
func (s *session) close() {
s.tops.close()
- if bc := s.o.GetBlockCache(); bc != nil {
- bc.Purge(nil)
- }
if s.manifest != nil {
s.manifest.Close()
}
@@ -81,6 +94,7 @@ func (s *session) close() {
s.stVersion = nil
}
+// Release session lock.
func (s *session) release() {
s.storLock.Release()
}
@@ -98,26 +112,26 @@ func (s *session) recover() (err error) {
// Don't return os.ErrNotExist if the underlying storage contains
// other files that belong to LevelDB. So the DB won't get trashed.
if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 {
- err = ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest file missing")}
+ err = &errors.ErrCorrupted{File: &storage.FileInfo{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
}
}
}()
- file, err := s.stor.GetManifest()
+ m, err := s.stor.GetManifest()
if err != nil {
return
}
- reader, err := file.Open()
+ reader, err := m.Open()
if err != nil {
return
}
defer reader.Close()
strict := s.o.GetStrict(opt.StrictManifest)
- jr := journal.NewReader(reader, dropper{s, file}, strict, true)
+ jr := journal.NewReader(reader, dropper{s, m}, strict, true)
- staging := s.version_NB().newStaging()
- rec := &sessionRecord{}
+ staging := s.stVersion.newStaging()
+ rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
for {
var r io.Reader
r, err = jr.Next()
@@ -126,51 +140,57 @@ func (s *session) recover() (err error) {
err = nil
break
}
- return
+ return errors.SetFile(err, m)
}
err = rec.decode(r)
if err == nil {
// save compact pointers
- for _, rp := range rec.compactionPointers {
- s.stCPtrs[rp.level] = iKey(rp.key)
+ for _, r := range rec.compPtrs {
+ s.stCompPtrs[r.level] = iKey(r.ikey)
}
// commit record to version staging
staging.commit(rec)
- } else if strict {
- return ErrCorrupted{Type: CorruptedManifest, Err: err}
} else {
- s.logf("manifest error: %v (skipped)", err)
+ err = errors.SetFile(err, m)
+ if strict || !errors.IsCorrupted(err) {
+ return
+ } else {
+ s.logf("manifest error: %v (skipped)", errors.SetFile(err, m))
+ }
}
- rec.resetCompactionPointers()
+ rec.resetCompPtrs()
rec.resetAddedTables()
rec.resetDeletedTables()
}
switch {
case !rec.has(recComparer):
- return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing comparer name")}
+ return newErrManifestCorrupted(m, "comparer", "missing")
case rec.comparer != s.icmp.uName():
- return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: comparer mismatch, " + "want '" + s.icmp.uName() + "', " + "got '" + rec.comparer + "'")}
- case !rec.has(recNextNum):
- return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing next file number")}
+ return newErrManifestCorrupted(m, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
+ case !rec.has(recNextFileNum):
+ return newErrManifestCorrupted(m, "next-file-num", "missing")
case !rec.has(recJournalNum):
- return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing journal file number")}
- case !rec.has(recSeq):
- return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing seq number")}
+ return newErrManifestCorrupted(m, "journal-file-num", "missing")
+ case !rec.has(recSeqNum):
+ return newErrManifestCorrupted(m, "seq-num", "missing")
}
- s.manifestFile = file
+ s.manifestFile = m
s.setVersion(staging.finish())
- s.setFileNum(rec.nextNum)
+ s.setNextFileNum(rec.nextFileNum)
s.recordCommited(rec)
return nil
}
// Commit session; need external synchronization.
func (s *session) commit(r *sessionRecord) (err error) {
+ v := s.version()
+ defer v.release()
+
// spawn new version based on current version
- nv := s.version_NB().spawn(r)
+ nv := v.spawn(r)
if s.manifest == nil {
// manifest journal writer not yet created, create one
@@ -189,22 +209,22 @@ func (s *session) commit(r *sessionRecord) (err error) {
// Pick a compaction based on current state; need external synchronization.
func (s *session) pickCompaction() *compaction {
- v := s.version_NB()
+ v := s.version()
var level int
var t0 tFiles
if v.cScore >= 1 {
level = v.cLevel
- cp := s.stCPtrs[level]
- tt := v.tables[level]
- for _, t := range tt {
- if cp == nil || s.icmp.Compare(t.max, cp) > 0 {
+ cptr := s.stCompPtrs[level]
+ tables := v.tables[level]
+ for _, t := range tables {
+ if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
t0 = append(t0, t)
break
}
}
if len(t0) == 0 {
- t0 = append(t0, tt[0])
+ t0 = append(t0, tables[0])
}
} else {
if p := atomic.LoadPointer(&v.cSeek); p != nil {
@@ -212,29 +232,21 @@ func (s *session) pickCompaction() *compaction {
level = ts.level
t0 = append(t0, ts.table)
} else {
+ v.release()
return nil
}
}
- c := &compaction{s: s, version: v, level: level}
- if level == 0 {
- min, max := t0.getRange(s.icmp)
- t0 = nil
- v.tables[0].getOverlaps(min.ukey(), max.ukey(), &t0, false, s.icmp.ucmp)
- }
-
- c.tables[0] = t0
- c.expand()
- return c
+ return newCompaction(s, v, level, t0)
}
// Create compaction from given level and range; need external synchronization.
-func (s *session) getCompactionRange(level int, min, max []byte) *compaction {
- v := s.version_NB()
+func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
+ v := s.version()
- var t0 tFiles
- v.tables[level].getOverlaps(min, max, &t0, level != 0, s.icmp.ucmp)
+ t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
if len(t0) == 0 {
+ v.release()
return nil
}
@@ -243,7 +255,7 @@ func (s *session) getCompactionRange(level int, min, max []byte) *compaction {
// and we must not pick one file and drop another older file if the
// two files overlap.
if level > 0 {
- limit := uint64(kMaxTableSize)
+ limit := uint64(v.s.o.GetCompactionSourceLimit(level))
total := uint64(0)
for i, t := range t0 {
total += t.size
@@ -255,90 +267,124 @@ func (s *session) getCompactionRange(level int, min, max []byte) *compaction {
}
}
- c := &compaction{s: s, version: v, level: level}
- c.tables[0] = t0
+ return newCompaction(s, v, level, t0)
+}
+
+func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction {
+ c := &compaction{
+ s: s,
+ v: v,
+ level: level,
+ tables: [2]tFiles{t0, nil},
+ maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)),
+ tPtrs: make([]int, s.o.GetNumLevel()),
+ }
c.expand()
+ c.save()
return c
}
-// compaction represent a compaction state
+// compaction represent a compaction state.
type compaction struct {
- s *session
- version *version
+ s *session
+ v *version
+
+ level int
+ tables [2]tFiles
+ maxGPOverlaps uint64
+
+ gp tFiles
+ gpi int
+ seenKey bool
+ gpOverlappedBytes uint64
+ imin, imax iKey
+ tPtrs []int
+ released bool
+
+ snapGPI int
+ snapSeenKey bool
+ snapGPOverlappedBytes uint64
+ snapTPtrs []int
+}
- level int
- tables [2]tFiles
+func (c *compaction) save() {
+ c.snapGPI = c.gpi
+ c.snapSeenKey = c.seenKey
+ c.snapGPOverlappedBytes = c.gpOverlappedBytes
+ c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
+}
- gp tFiles
- gpidx int
- seenKey bool
- overlappedBytes uint64
- min, max iKey
+func (c *compaction) restore() {
+ c.gpi = c.snapGPI
+ c.seenKey = c.snapSeenKey
+ c.gpOverlappedBytes = c.snapGPOverlappedBytes
+ c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
+}
- tPtrs [kNumLevels]int
+func (c *compaction) release() {
+ if !c.released {
+ c.released = true
+ c.v.release()
+ }
}
// Expand compacted tables; need external synchronization.
func (c *compaction) expand() {
- s := c.s
- v := c.version
-
- level := c.level
- vt0, vt1 := v.tables[level], v.tables[level+1]
+ limit := uint64(c.s.o.GetCompactionExpandLimit(c.level))
+ vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1]
t0, t1 := c.tables[0], c.tables[1]
- min, max := t0.getRange(s.icmp)
- vt1.getOverlaps(min.ukey(), max.ukey(), &t1, true, s.icmp.ucmp)
-
- // Get entire range covered by compaction
- amin, amax := append(t0, t1...).getRange(s.icmp)
+ imin, imax := t0.getRange(c.s.icmp)
+ // We expand t0 here just incase ukey hop across tables.
+ t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0)
+ if len(t0) != len(c.tables[0]) {
+ imin, imax = t0.getRange(c.s.icmp)
+ }
+ t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
+ // Get entire range covered by compaction.
+ amin, amax := append(t0, t1...).getRange(c.s.icmp)
// See if we can grow the number of inputs in "level" without
// changing the number of "level+1" files we pick up.
if len(t1) > 0 {
- var exp0 tFiles
- vt0.getOverlaps(amin.ukey(), amax.ukey(), &exp0, level != 0, s.icmp.ucmp)
- if len(exp0) > len(t0) && t1.size()+exp0.size() < kExpCompactionMaxBytes {
- var exp1 tFiles
- xmin, xmax := exp0.getRange(s.icmp)
- vt1.getOverlaps(xmin.ukey(), xmax.ukey(), &exp1, true, s.icmp.ucmp)
+ exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0)
+ if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
+ xmin, xmax := exp0.getRange(c.s.icmp)
+ exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
if len(exp1) == len(t1) {
- s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
- level, level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
+ c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
+ c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
- min, max = xmin, xmax
+ imin, imax = xmin, xmax
t0, t1 = exp0, exp1
- amin, amax = append(t0, t1...).getRange(s.icmp)
+ amin, amax = append(t0, t1...).getRange(c.s.icmp)
}
}
}
// Compute the set of grandparent files that overlap this compaction
// (parent == level+1; grandparent == level+2)
- if level+2 < kNumLevels {
- v.tables[level+2].getOverlaps(amin.ukey(), amax.ukey(), &c.gp, true, s.icmp.ucmp)
+ if c.level+2 < c.s.o.GetNumLevel() {
+ c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
}
c.tables[0], c.tables[1] = t0, t1
- c.min, c.max = min, max
+ c.imin, c.imax = imin, imax
}
// Check whether compaction is trivial.
func (c *compaction) trivial() bool {
- return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= kMaxGrandParentOverlapBytes
+ return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
}
-func (c *compaction) isBaseLevelForKey(key []byte) bool {
- s := c.s
- v := c.version
-
- for level, tt := range v.tables[c.level+2:] {
- for c.tPtrs[level] < len(tt) {
- t := tt[c.tPtrs[level]]
- if s.icmp.uCompare(key, t.max.ukey()) <= 0 {
- // We've advanced far enough
- if s.icmp.uCompare(key, t.min.ukey()) >= 0 {
- // Key falls in this file's range, so definitely not base level
+func (c *compaction) baseLevelForKey(ukey []byte) bool {
+ for level, tables := range c.v.tables[c.level+2:] {
+ for c.tPtrs[level] < len(tables) {
+ t := tables[c.tPtrs[level]]
+ if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
+ // We've advanced far enough.
+ if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ // Key falls in this file's range, so definitely not base level.
return false
}
break
@@ -349,55 +395,61 @@ func (c *compaction) isBaseLevelForKey(key []byte) bool {
return true
}
-func (c *compaction) shouldStopBefore(key iKey) bool {
- for ; c.gpidx < len(c.gp); c.gpidx++ {
- gp := c.gp[c.gpidx]
- if c.s.icmp.Compare(key, gp.max) <= 0 {
+func (c *compaction) shouldStopBefore(ikey iKey) bool {
+ for ; c.gpi < len(c.gp); c.gpi++ {
+ gp := c.gp[c.gpi]
+ if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
break
}
if c.seenKey {
- c.overlappedBytes += gp.size
+ c.gpOverlappedBytes += gp.size
}
}
c.seenKey = true
- if c.overlappedBytes > kMaxGrandParentOverlapBytes {
- // Too much overlap for current output; start new output
- c.overlappedBytes = 0
+ if c.gpOverlappedBytes > c.maxGPOverlaps {
+ // Too much overlap for current output; start new output.
+ c.gpOverlappedBytes = 0
return true
}
return false
}
+// Creates an iterator.
func (c *compaction) newIterator() iterator.Iterator {
- s := c.s
-
- level := c.level
- icap := 2
+ // Creates iterator slice.
+ icap := len(c.tables)
if c.level == 0 {
+ // Special case for level-0
icap = len(c.tables[0]) + 1
}
its := make([]iterator.Iterator, 0, icap)
+ // Options.
ro := &opt.ReadOptions{
DontFillCache: true,
+ Strict: opt.StrictOverride,
+ }
+ strict := c.s.o.GetStrict(opt.StrictCompaction)
+ if strict {
+ ro.Strict |= opt.StrictReader
}
- strict := s.o.GetStrict(opt.StrictIterator)
- for i, tt := range c.tables {
- if len(tt) == 0 {
+ for i, tables := range c.tables {
+ if len(tables) == 0 {
continue
}
- if level+i == 0 {
- for _, t := range tt {
- its = append(its, s.tops.newIterator(t, nil, ro))
+ // Level-0 is not sorted and may overlaps each other.
+ if c.level+i == 0 {
+ for _, t := range tables {
+ its = append(its, c.s.tops.newIterator(t, nil, ro))
}
} else {
- it := iterator.NewIndexedIterator(tt.newIndexIterator(s.tops, s.icmp, nil, ro), strict, true)
+ it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
its = append(its, it)
}
}
- return iterator.NewMergedIterator(its, s.icmp, true)
+ return iterator.NewMergedIterator(its, c.s.icmp, strict)
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go
index c50fda737..1bdcc68f5 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go
@@ -9,11 +9,11 @@ package leveldb
import (
"bufio"
"encoding/binary"
- "errors"
"io"
-)
+ "strings"
-var errCorruptManifest = errors.New("leveldb: corrupt manifest")
+ "github.com/syndtr/goleveldb/leveldb/errors"
+)
type byteReader interface {
io.Reader
@@ -22,32 +22,28 @@ type byteReader interface {
// These numbers are written to disk and should not be changed.
const (
- recComparer = 1
- recJournalNum = 2
- recNextNum = 3
- recSeq = 4
- recCompactionPointer = 5
- recDeletedTable = 6
- recNewTable = 7
+ recComparer = 1
+ recJournalNum = 2
+ recNextFileNum = 3
+ recSeqNum = 4
+ recCompPtr = 5
+ recDelTable = 6
+ recAddTable = 7
// 8 was used for large value refs
recPrevJournalNum = 9
)
type cpRecord struct {
level int
- key iKey
+ ikey iKey
}
-type ntRecord struct {
+type atRecord struct {
level int
num uint64
size uint64
- min iKey
- max iKey
-}
-
-func (r ntRecord) makeFile(s *session) *tFile {
- return newTFile(s.getTableFile(r.num), r.size, r.min, r.max)
+ imin iKey
+ imax iKey
}
type dtRecord struct {
@@ -56,17 +52,20 @@ type dtRecord struct {
}
type sessionRecord struct {
- hasRec int
- comparer string
- journalNum uint64
- prevJournalNum uint64
- nextNum uint64
- seq uint64
- compactionPointers []cpRecord
- addedTables []ntRecord
- deletedTables []dtRecord
- scratch [binary.MaxVarintLen64]byte
- err error
+ numLevel int
+
+ hasRec int
+ comparer string
+ journalNum uint64
+ prevJournalNum uint64
+ nextFileNum uint64
+ seqNum uint64
+ compPtrs []cpRecord
+ addedTables []atRecord
+ deletedTables []dtRecord
+
+ scratch [binary.MaxVarintLen64]byte
+ err error
}
func (p *sessionRecord) has(rec int) bool {
@@ -88,47 +87,47 @@ func (p *sessionRecord) setPrevJournalNum(num uint64) {
p.prevJournalNum = num
}
-func (p *sessionRecord) setNextNum(num uint64) {
- p.hasRec |= 1 << recNextNum
- p.nextNum = num
+func (p *sessionRecord) setNextFileNum(num uint64) {
+ p.hasRec |= 1 << recNextFileNum
+ p.nextFileNum = num
}
-func (p *sessionRecord) setSeq(seq uint64) {
- p.hasRec |= 1 << recSeq
- p.seq = seq
+func (p *sessionRecord) setSeqNum(num uint64) {
+ p.hasRec |= 1 << recSeqNum
+ p.seqNum = num
}
-func (p *sessionRecord) addCompactionPointer(level int, key iKey) {
- p.hasRec |= 1 << recCompactionPointer
- p.compactionPointers = append(p.compactionPointers, cpRecord{level, key})
+func (p *sessionRecord) addCompPtr(level int, ikey iKey) {
+ p.hasRec |= 1 << recCompPtr
+ p.compPtrs = append(p.compPtrs, cpRecord{level, ikey})
}
-func (p *sessionRecord) resetCompactionPointers() {
- p.hasRec &= ^(1 << recCompactionPointer)
- p.compactionPointers = p.compactionPointers[:0]
+func (p *sessionRecord) resetCompPtrs() {
+ p.hasRec &= ^(1 << recCompPtr)
+ p.compPtrs = p.compPtrs[:0]
}
-func (p *sessionRecord) addTable(level int, num, size uint64, min, max iKey) {
- p.hasRec |= 1 << recNewTable
- p.addedTables = append(p.addedTables, ntRecord{level, num, size, min, max})
+func (p *sessionRecord) addTable(level int, num, size uint64, imin, imax iKey) {
+ p.hasRec |= 1 << recAddTable
+ p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax})
}
func (p *sessionRecord) addTableFile(level int, t *tFile) {
- p.addTable(level, t.file.Num(), t.size, t.min, t.max)
+ p.addTable(level, t.file.Num(), t.size, t.imin, t.imax)
}
func (p *sessionRecord) resetAddedTables() {
- p.hasRec &= ^(1 << recNewTable)
+ p.hasRec &= ^(1 << recAddTable)
p.addedTables = p.addedTables[:0]
}
-func (p *sessionRecord) deleteTable(level int, num uint64) {
- p.hasRec |= 1 << recDeletedTable
+func (p *sessionRecord) delTable(level int, num uint64) {
+ p.hasRec |= 1 << recDelTable
p.deletedTables = append(p.deletedTables, dtRecord{level, num})
}
func (p *sessionRecord) resetDeletedTables() {
- p.hasRec &= ^(1 << recDeletedTable)
+ p.hasRec &= ^(1 << recDelTable)
p.deletedTables = p.deletedTables[:0]
}
@@ -161,43 +160,45 @@ func (p *sessionRecord) encode(w io.Writer) error {
p.putUvarint(w, recJournalNum)
p.putUvarint(w, p.journalNum)
}
- if p.has(recNextNum) {
- p.putUvarint(w, recNextNum)
- p.putUvarint(w, p.nextNum)
+ if p.has(recNextFileNum) {
+ p.putUvarint(w, recNextFileNum)
+ p.putUvarint(w, p.nextFileNum)
}
- if p.has(recSeq) {
- p.putUvarint(w, recSeq)
- p.putUvarint(w, p.seq)
+ if p.has(recSeqNum) {
+ p.putUvarint(w, recSeqNum)
+ p.putUvarint(w, p.seqNum)
}
- for _, cp := range p.compactionPointers {
- p.putUvarint(w, recCompactionPointer)
- p.putUvarint(w, uint64(cp.level))
- p.putBytes(w, cp.key)
+ for _, r := range p.compPtrs {
+ p.putUvarint(w, recCompPtr)
+ p.putUvarint(w, uint64(r.level))
+ p.putBytes(w, r.ikey)
}
- for _, t := range p.deletedTables {
- p.putUvarint(w, recDeletedTable)
- p.putUvarint(w, uint64(t.level))
- p.putUvarint(w, t.num)
+ for _, r := range p.deletedTables {
+ p.putUvarint(w, recDelTable)
+ p.putUvarint(w, uint64(r.level))
+ p.putUvarint(w, r.num)
}
- for _, t := range p.addedTables {
- p.putUvarint(w, recNewTable)
- p.putUvarint(w, uint64(t.level))
- p.putUvarint(w, t.num)
- p.putUvarint(w, t.size)
- p.putBytes(w, t.min)
- p.putBytes(w, t.max)
+ for _, r := range p.addedTables {
+ p.putUvarint(w, recAddTable)
+ p.putUvarint(w, uint64(r.level))
+ p.putUvarint(w, r.num)
+ p.putUvarint(w, r.size)
+ p.putBytes(w, r.imin)
+ p.putBytes(w, r.imax)
}
return p.err
}
-func (p *sessionRecord) readUvarint(r io.ByteReader) uint64 {
+func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 {
if p.err != nil {
return 0
}
x, err := binary.ReadUvarint(r)
if err != nil {
- if err == io.EOF {
- p.err = errCorruptManifest
+ if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) {
+ p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"})
+ } else if strings.HasPrefix(err.Error(), "binary:") {
+ p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, err.Error()})
} else {
p.err = err
}
@@ -206,35 +207,39 @@ func (p *sessionRecord) readUvarint(r io.ByteReader) uint64 {
return x
}
-func (p *sessionRecord) readBytes(r byteReader) []byte {
+func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 {
+ return p.readUvarintMayEOF(field, r, false)
+}
+
+func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
if p.err != nil {
return nil
}
- n := p.readUvarint(r)
+ n := p.readUvarint(field, r)
if p.err != nil {
return nil
}
x := make([]byte, n)
_, p.err = io.ReadFull(r, x)
if p.err != nil {
- if p.err == io.EOF {
- p.err = errCorruptManifest
+ if p.err == io.ErrUnexpectedEOF {
+ p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"})
}
return nil
}
return x
}
-func (p *sessionRecord) readLevel(r io.ByteReader) int {
+func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
if p.err != nil {
return 0
}
- x := p.readUvarint(r)
+ x := p.readUvarint(field, r)
if p.err != nil {
return 0
}
- if x >= kNumLevels {
- p.err = errCorruptManifest
+ if x >= uint64(p.numLevel) {
+ p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "invalid level number"})
return 0
}
return int(x)
@@ -247,59 +252,59 @@ func (p *sessionRecord) decode(r io.Reader) error {
}
p.err = nil
for p.err == nil {
- rec, err := binary.ReadUvarint(br)
- if err != nil {
- if err == io.EOF {
- err = nil
+ rec := p.readUvarintMayEOF("field-header", br, true)
+ if p.err != nil {
+ if p.err == io.EOF {
+ return nil
}
- return err
+ return p.err
}
switch rec {
case recComparer:
- x := p.readBytes(br)
+ x := p.readBytes("comparer", br)
if p.err == nil {
p.setComparer(string(x))
}
case recJournalNum:
- x := p.readUvarint(br)
+ x := p.readUvarint("journal-num", br)
if p.err == nil {
p.setJournalNum(x)
}
case recPrevJournalNum:
- x := p.readUvarint(br)
+ x := p.readUvarint("prev-journal-num", br)
if p.err == nil {
p.setPrevJournalNum(x)
}
- case recNextNum:
- x := p.readUvarint(br)
+ case recNextFileNum:
+ x := p.readUvarint("next-file-num", br)
if p.err == nil {
- p.setNextNum(x)
+ p.setNextFileNum(x)
}
- case recSeq:
- x := p.readUvarint(br)
+ case recSeqNum:
+ x := p.readUvarint("seq-num", br)
if p.err == nil {
- p.setSeq(x)
+ p.setSeqNum(x)
}
- case recCompactionPointer:
- level := p.readLevel(br)
- key := p.readBytes(br)
+ case recCompPtr:
+ level := p.readLevel("comp-ptr.level", br)
+ ikey := p.readBytes("comp-ptr.ikey", br)
if p.err == nil {
- p.addCompactionPointer(level, iKey(key))
+ p.addCompPtr(level, iKey(ikey))
}
- case recNewTable:
- level := p.readLevel(br)
- num := p.readUvarint(br)
- size := p.readUvarint(br)
- min := p.readBytes(br)
- max := p.readBytes(br)
+ case recAddTable:
+ level := p.readLevel("add-table.level", br)
+ num := p.readUvarint("add-table.num", br)
+ size := p.readUvarint("add-table.size", br)
+ imin := p.readBytes("add-table.imin", br)
+ imax := p.readBytes("add-table.imax", br)
if p.err == nil {
- p.addTable(level, num, size, min, max)
+ p.addTable(level, num, size, imin, imax)
}
- case recDeletedTable:
- level := p.readLevel(br)
- num := p.readUvarint(br)
+ case recDelTable:
+ level := p.readLevel("del-table.level", br)
+ num := p.readUvarint("del-table.num", br)
if p.err == nil {
- p.deleteTable(level, num)
+ p.delTable(level, num)
}
}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go
index 029fabfe6..c0c035ae3 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go
@@ -9,6 +9,8 @@ package leveldb
import (
"bytes"
"testing"
+
+ "github.com/syndtr/goleveldb/leveldb/opt"
)
func decodeEncode(v *sessionRecord) (res bool, err error) {
@@ -17,7 +19,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {
if err != nil {
return
}
- v2 := new(sessionRecord)
+ v2 := &sessionRecord{numLevel: opt.DefaultNumLevel}
err = v.decode(b)
if err != nil {
return
@@ -32,7 +34,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {
func TestSessionRecord_EncodeDecode(t *testing.T) {
big := uint64(1) << 50
- v := new(sessionRecord)
+ v := &sessionRecord{numLevel: opt.DefaultNumLevel}
i := uint64(0)
test := func() {
res, err := decodeEncode(v)
@@ -47,16 +49,16 @@ func TestSessionRecord_EncodeDecode(t *testing.T) {
for ; i < 4; i++ {
test()
v.addTable(3, big+300+i, big+400+i,
- newIKey([]byte("foo"), big+500+1, tVal),
- newIKey([]byte("zoo"), big+600+1, tDel))
- v.deleteTable(4, big+700+i)
- v.addCompactionPointer(int(i), newIKey([]byte("x"), big+900+1, tVal))
+ newIkey([]byte("foo"), big+500+1, ktVal),
+ newIkey([]byte("zoo"), big+600+1, ktDel))
+ v.delTable(4, big+700+i)
+ v.addCompPtr(int(i), newIkey([]byte("x"), big+900+1, ktVal))
}
v.setComparer("foo")
v.setJournalNum(big + 100)
v.setPrevJournalNum(big + 99)
- v.setNextNum(big + 200)
- v.setSeq(big + 1000)
+ v.setNextFileNum(big + 200)
+ v.setSeqNum(big + 1000)
test()
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go
index bf412b030..007c02cde 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go
@@ -14,7 +14,7 @@ import (
"github.com/syndtr/goleveldb/leveldb/storage"
)
-// logging
+// Logging.
type dropper struct {
s *session
@@ -22,22 +22,17 @@ type dropper struct {
}
func (d dropper) Drop(err error) {
- if e, ok := err.(journal.DroppedError); ok {
+ if e, ok := err.(*journal.ErrCorrupted); ok {
d.s.logf("journal@drop %s-%d S·%s %q", d.file.Type(), d.file.Num(), shortenb(e.Size), e.Reason)
} else {
d.s.logf("journal@drop %s-%d %q", d.file.Type(), d.file.Num(), err)
}
}
-func (s *session) log(v ...interface{}) {
- s.stor.Log(fmt.Sprint(v...))
-}
-
-func (s *session) logf(format string, v ...interface{}) {
- s.stor.Log(fmt.Sprintf(format, v...))
-}
+func (s *session) log(v ...interface{}) { s.stor.Log(fmt.Sprint(v...)) }
+func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf(format, v...)) }
-// file utils
+// File utils.
func (s *session) getJournalFile(num uint64) storage.File {
return s.stor.GetFile(num, storage.TypeJournal)
@@ -56,9 +51,14 @@ func (s *session) newTemp() storage.File {
return s.stor.GetFile(num, storage.TypeTemp)
}
-// session state
+func (s *session) tableFileFromRecord(r atRecord) *tFile {
+ return newTableFile(s.getTableFile(r.num), r.size, r.imin, r.imax)
+}
+
+// Session state.
-// Get current version.
+// Get current version. This will incr version ref, must call
+// version.release (exactly once) after use.
func (s *session) version() *version {
s.vmu.Lock()
defer s.vmu.Unlock()
@@ -66,85 +66,80 @@ func (s *session) version() *version {
return s.stVersion
}
-// Get current version; no barrier.
-func (s *session) version_NB() *version {
- return s.stVersion
-}
-
// Set current version to v.
func (s *session) setVersion(v *version) {
s.vmu.Lock()
- v.ref = 1
+ v.ref = 1 // Holds by session.
if old := s.stVersion; old != nil {
- v.ref++
+ v.ref++ // Holds by old version.
old.next = v
- old.release_NB()
+ old.releaseNB()
}
s.stVersion = v
s.vmu.Unlock()
}
// Get current unused file number.
-func (s *session) fileNum() uint64 {
- return atomic.LoadUint64(&s.stFileNum)
+func (s *session) nextFileNum() uint64 {
+ return atomic.LoadUint64(&s.stNextFileNum)
}
-// Get current unused file number to num.
-func (s *session) setFileNum(num uint64) {
- atomic.StoreUint64(&s.stFileNum, num)
+// Set current unused file number to num.
+func (s *session) setNextFileNum(num uint64) {
+ atomic.StoreUint64(&s.stNextFileNum, num)
}
// Mark file number as used.
func (s *session) markFileNum(num uint64) {
- num += 1
+ nextFileNum := num + 1
for {
- old, x := s.stFileNum, num
+ old, x := s.stNextFileNum, nextFileNum
if old > x {
x = old
}
- if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) {
+ if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) {
break
}
}
}
// Allocate a file number.
-func (s *session) allocFileNum() (num uint64) {
- return atomic.AddUint64(&s.stFileNum, 1) - 1
+func (s *session) allocFileNum() uint64 {
+ return atomic.AddUint64(&s.stNextFileNum, 1) - 1
}
// Reuse given file number.
func (s *session) reuseFileNum(num uint64) {
for {
- old, x := s.stFileNum, num
+ old, x := s.stNextFileNum, num
if old != x+1 {
x = old
}
- if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) {
+ if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) {
break
}
}
}
-// manifest related utils
+// Manifest related utils.
// Fill given session record obj with current states; need external
// synchronization.
func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
- r.setNextNum(s.fileNum())
+ r.setNextFileNum(s.nextFileNum())
if snapshot {
if !r.has(recJournalNum) {
r.setJournalNum(s.stJournalNum)
}
- if !r.has(recSeq) {
- r.setSeq(s.stSeq)
+ if !r.has(recSeqNum) {
+ r.setSeqNum(s.stSeqNum)
}
- for level, ik := range s.stCPtrs {
+ for level, ik := range s.stCompPtrs {
if ik != nil {
- r.addCompactionPointer(level, ik)
+ r.addCompPtr(level, ik)
}
}
@@ -152,7 +147,7 @@ func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
}
}
-// Mark if record has been commited, this will update session state;
+// Mark if record has been committed, this will update session state;
// need external synchronization.
func (s *session) recordCommited(r *sessionRecord) {
if r.has(recJournalNum) {
@@ -163,12 +158,12 @@ func (s *session) recordCommited(r *sessionRecord) {
s.stPrevJournalNum = r.prevJournalNum
}
- if r.has(recSeq) {
- s.stSeq = r.seq
+ if r.has(recSeqNum) {
+ s.stSeqNum = r.seqNum
}
- for _, p := range r.compactionPointers {
- s.stCPtrs[p.level] = iKey(p.key)
+ for _, p := range r.compPtrs {
+ s.stCompPtrs[p.level] = iKey(p.ikey)
}
}
@@ -183,10 +178,11 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
jw := journal.NewWriter(writer)
if v == nil {
- v = s.version_NB()
+ v = s.version()
+ defer v.release()
}
if rec == nil {
- rec = new(sessionRecord)
+ rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
}
s.fillRecord(rec, true)
v.fillRecord(rec)
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
index 75439f6db..46cc9d070 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
@@ -221,7 +221,7 @@ func (fs *fileStorage) GetManifest() (f File, err error) {
fs.log(fmt.Sprintf("skipping %s: invalid file name", fn))
continue
}
- if _, e1 := strconv.ParseUint(fn[7:], 10, 0); e1 != nil {
+ if _, e1 := strconv.ParseUint(fn[8:], 10, 0); e1 != nil {
fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", fn, e1))
continue
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
new file mode 100644
index 000000000..102031bfd
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
@@ -0,0 +1,68 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build solaris
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+type unixFileLock struct {
+ f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+ if err := setFileLock(fl.f, false); err != nil {
+ return err
+ }
+ return fl.f.Close()
+}
+
+func newFileLock(path string) (fl fileLock, err error) {
+ f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644)
+ if err != nil {
+ return
+ }
+ err = setFileLock(f, true)
+ if err != nil {
+ f.Close()
+ return
+ }
+ fl = &unixFileLock{f: f}
+ return
+}
+
+func setFileLock(f *os.File, lock bool) error {
+ flock := syscall.Flock_t{
+ Type: syscall.F_UNLCK,
+ Start: 0,
+ Len: 0,
+ Whence: 1,
+ }
+ if lock {
+ flock.Type = syscall.F_WRLCK
+ }
+ return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock)
+}
+
+func rename(oldpath, newpath string) error {
+ return os.Rename(oldpath, newpath)
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
index 73499afc8..d0a604b7a 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
@@ -4,7 +4,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-// +build darwin freebsd linux netbsd openbsd
+// +build darwin dragonfly freebsd linux netbsd openbsd
package storage
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go
index de5694888..85dd70b06 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go
@@ -67,7 +67,7 @@ type Writer interface {
Syncer
}
-// File is the file.
+// File is the file. A file instance must be goroutine-safe.
type File interface {
// Open opens the file for read. Returns os.ErrNotExist error
// if the file does not exist.
@@ -94,7 +94,7 @@ type File interface {
Remove() error
}
-// Storage is the storage.
+// Storage is the storage. A storage instance must be goroutine-safe.
type Storage interface {
// Lock locks the storage. Any subsequent attempt to call Lock will fail
// until the last lock released.
@@ -125,3 +125,33 @@ type Storage interface {
// Other methods should not be called after the storage has been closed.
Close() error
}
+
+// FileInfo wraps basic file info.
+type FileInfo struct {
+ Type FileType
+ Num uint64
+}
+
+func (fi FileInfo) String() string {
+ switch fi.Type {
+ case TypeManifest:
+ return fmt.Sprintf("MANIFEST-%06d", fi.Num)
+ case TypeJournal:
+ return fmt.Sprintf("%06d.log", fi.Num)
+ case TypeTable:
+ return fmt.Sprintf("%06d.ldb", fi.Num)
+ case TypeTemp:
+ return fmt.Sprintf("%06d.tmp", fi.Num)
+ default:
+ return fmt.Sprintf("%#x-%d", fi.Type, fi.Num)
+ }
+}
+
+// NewFileInfo creates new FileInfo from the given File. It will returns nil
+// if File is nil.
+func NewFileInfo(f File) *FileInfo {
+ if f == nil {
+ return nil
+ }
+ return &FileInfo{f.Type(), f.Num()}
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go
index 27e76d707..dc1f1fb54 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go
@@ -11,6 +11,7 @@ import (
"fmt"
"io"
"io/ioutil"
+ "math/rand"
"os"
"path/filepath"
"sync"
@@ -28,11 +29,25 @@ var (
)
var (
- tsFSEnv = os.Getenv("GOLEVELDB_USEFS")
- tsKeepFS = tsFSEnv == "2"
- tsFS = tsKeepFS || tsFSEnv == "" || tsFSEnv == "1"
- tsMU = &sync.Mutex{}
- tsNum = 0
+ tsFSEnv = os.Getenv("GOLEVELDB_USEFS")
+ tsTempdir = os.Getenv("GOLEVELDB_TEMPDIR")
+ tsKeepFS = tsFSEnv == "2"
+ tsFS = tsKeepFS || tsFSEnv == "" || tsFSEnv == "1"
+ tsMU = &sync.Mutex{}
+ tsNum = 0
+)
+
+type tsOp uint
+
+const (
+ tsOpOpen tsOp = iota
+ tsOpCreate
+ tsOpRead
+ tsOpReadAt
+ tsOpWrite
+ tsOpSync
+
+ tsOpNum
)
type tsLock struct {
@@ -53,6 +68,9 @@ type tsReader struct {
func (tr tsReader) Read(b []byte) (n int, err error) {
ts := tr.tf.ts
ts.countRead(tr.tf.Type())
+ if tr.tf.shouldErrLocked(tsOpRead) {
+ return 0, errors.New("leveldb.testStorage: emulated read error")
+ }
n, err = tr.Reader.Read(b)
if err != nil && err != io.EOF {
ts.t.Errorf("E: read error, num=%d type=%v n=%d: %v", tr.tf.Num(), tr.tf.Type(), n, err)
@@ -63,6 +81,9 @@ func (tr tsReader) Read(b []byte) (n int, err error) {
func (tr tsReader) ReadAt(b []byte, off int64) (n int, err error) {
ts := tr.tf.ts
ts.countRead(tr.tf.Type())
+ if tr.tf.shouldErrLocked(tsOpReadAt) {
+ return 0, errors.New("leveldb.testStorage: emulated readAt error")
+ }
n, err = tr.Reader.ReadAt(b, off)
if err != nil && err != io.EOF {
ts.t.Errorf("E: readAt error, num=%d type=%v off=%d n=%d: %v", tr.tf.Num(), tr.tf.Type(), off, n, err)
@@ -82,15 +103,12 @@ type tsWriter struct {
}
func (tw tsWriter) Write(b []byte) (n int, err error) {
- ts := tw.tf.ts
- ts.mu.Lock()
- defer ts.mu.Unlock()
- if ts.emuWriteErr&tw.tf.Type() != 0 {
+ if tw.tf.shouldErrLocked(tsOpWrite) {
return 0, errors.New("leveldb.testStorage: emulated write error")
}
n, err = tw.Writer.Write(b)
if err != nil {
- ts.t.Errorf("E: write error, num=%d type=%v n=%d: %v", tw.tf.Num(), tw.tf.Type(), n, err)
+ tw.tf.ts.t.Errorf("E: write error, num=%d type=%v n=%d: %v", tw.tf.Num(), tw.tf.Type(), n, err)
}
return
}
@@ -98,23 +116,23 @@ func (tw tsWriter) Write(b []byte) (n int, err error) {
func (tw tsWriter) Sync() (err error) {
ts := tw.tf.ts
ts.mu.Lock()
- defer ts.mu.Unlock()
for ts.emuDelaySync&tw.tf.Type() != 0 {
ts.cond.Wait()
}
- if ts.emuSyncErr&tw.tf.Type() != 0 {
+ ts.mu.Unlock()
+ if tw.tf.shouldErrLocked(tsOpSync) {
return errors.New("leveldb.testStorage: emulated sync error")
}
err = tw.Writer.Sync()
if err != nil {
- ts.t.Errorf("E: sync error, num=%d type=%v: %v", tw.tf.Num(), tw.tf.Type(), err)
+ tw.tf.ts.t.Errorf("E: sync error, num=%d type=%v: %v", tw.tf.Num(), tw.tf.Type(), err)
}
return
}
func (tw tsWriter) Close() (err error) {
err = tw.Writer.Close()
- tw.tf.close("reader", err)
+ tw.tf.close("writer", err)
return
}
@@ -127,6 +145,16 @@ func (tf tsFile) x() uint64 {
return tf.Num()<<typeShift | uint64(tf.Type())
}
+func (tf tsFile) shouldErr(op tsOp) bool {
+ return tf.ts.shouldErr(tf, op)
+}
+
+func (tf tsFile) shouldErrLocked(op tsOp) bool {
+ tf.ts.mu.Lock()
+ defer tf.ts.mu.Unlock()
+ return tf.shouldErr(op)
+}
+
func (tf tsFile) checkOpen(m string) error {
ts := tf.ts
if writer, ok := ts.opens[tf.x()]; ok {
@@ -163,7 +191,7 @@ func (tf tsFile) Open() (r storage.Reader, err error) {
if err != nil {
return
}
- if ts.emuOpenErr&tf.Type() != 0 {
+ if tf.shouldErr(tsOpOpen) {
err = errors.New("leveldb.testStorage: emulated open error")
return
}
@@ -190,7 +218,7 @@ func (tf tsFile) Create() (w storage.Writer, err error) {
if err != nil {
return
}
- if ts.emuCreateErr&tf.Type() != 0 {
+ if tf.shouldErr(tsOpCreate) {
err = errors.New("leveldb.testStorage: emulated create error")
return
}
@@ -205,6 +233,23 @@ func (tf tsFile) Create() (w storage.Writer, err error) {
return
}
+func (tf tsFile) Replace(newfile storage.File) (err error) {
+ ts := tf.ts
+ ts.mu.Lock()
+ defer ts.mu.Unlock()
+ err = tf.checkOpen("replace")
+ if err != nil {
+ return
+ }
+ err = tf.File.Replace(newfile.(tsFile).File)
+ if err != nil {
+ ts.t.Errorf("E: cannot replace file, num=%d type=%v: %v", tf.Num(), tf.Type(), err)
+ } else {
+ ts.t.Logf("I: file replace, num=%d type=%v", tf.Num(), tf.Type())
+ }
+ return
+}
+
func (tf tsFile) Remove() (err error) {
ts := tf.ts
ts.mu.Lock()
@@ -231,51 +276,75 @@ type testStorage struct {
cond sync.Cond
// Open files, true=writer, false=reader
opens map[uint64]bool
- emuOpenErr storage.FileType
- emuCreateErr storage.FileType
emuDelaySync storage.FileType
- emuWriteErr storage.FileType
- emuSyncErr storage.FileType
ignoreOpenErr storage.FileType
readCnt uint64
readCntEn storage.FileType
+
+ emuErr [tsOpNum]storage.FileType
+ emuErrOnce [tsOpNum]storage.FileType
+ emuRandErr [tsOpNum]storage.FileType
+ emuRandErrProb int
+ emuErrOnceMap map[uint64]uint
+ emuRandRand *rand.Rand
+}
+
+func (ts *testStorage) shouldErr(tf tsFile, op tsOp) bool {
+ if ts.emuErr[op]&tf.Type() != 0 {
+ return true
+ } else if ts.emuRandErr[op]&tf.Type() != 0 || ts.emuErrOnce[op]&tf.Type() != 0 {
+ sop := uint(1) << op
+ eop := ts.emuErrOnceMap[tf.x()]
+ if eop&sop == 0 && (ts.emuRandRand.Int()%ts.emuRandErrProb == 0 || ts.emuErrOnce[op]&tf.Type() != 0) {
+ ts.emuErrOnceMap[tf.x()] = eop | sop
+ ts.t.Logf("I: emulated error: file=%d type=%v op=%v", tf.Num(), tf.Type(), op)
+ return true
+ }
+ }
+ return false
}
-func (ts *testStorage) SetOpenErr(t storage.FileType) {
+func (ts *testStorage) SetEmuErr(t storage.FileType, ops ...tsOp) {
ts.mu.Lock()
- ts.emuOpenErr = t
+ for _, op := range ops {
+ ts.emuErr[op] = t
+ }
ts.mu.Unlock()
}
-func (ts *testStorage) SetCreateErr(t storage.FileType) {
+func (ts *testStorage) SetEmuErrOnce(t storage.FileType, ops ...tsOp) {
ts.mu.Lock()
- ts.emuCreateErr = t
+ for _, op := range ops {
+ ts.emuErrOnce[op] = t
+ }
ts.mu.Unlock()
}
-func (ts *testStorage) DelaySync(t storage.FileType) {
+func (ts *testStorage) SetEmuRandErr(t storage.FileType, ops ...tsOp) {
ts.mu.Lock()
- ts.emuDelaySync |= t
- ts.cond.Broadcast()
+ for _, op := range ops {
+ ts.emuRandErr[op] = t
+ }
ts.mu.Unlock()
}
-func (ts *testStorage) ReleaseSync(t storage.FileType) {
+func (ts *testStorage) SetEmuRandErrProb(prob int) {
ts.mu.Lock()
- ts.emuDelaySync &= ^t
- ts.cond.Broadcast()
+ ts.emuRandErrProb = prob
ts.mu.Unlock()
}
-func (ts *testStorage) SetWriteErr(t storage.FileType) {
+func (ts *testStorage) DelaySync(t storage.FileType) {
ts.mu.Lock()
- ts.emuWriteErr = t
+ ts.emuDelaySync |= t
+ ts.cond.Broadcast()
ts.mu.Unlock()
}
-func (ts *testStorage) SetSyncErr(t storage.FileType) {
+func (ts *testStorage) ReleaseSync(t storage.FileType) {
ts.mu.Lock()
- ts.emuSyncErr = t
+ ts.emuDelaySync &= ^t
+ ts.cond.Broadcast()
ts.mu.Unlock()
}
@@ -413,7 +482,11 @@ func newTestStorage(t *testing.T) *testStorage {
num := tsNum
tsNum++
tsMU.Unlock()
- path := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldb-test%d0%d0%d", os.Getuid(), os.Getpid(), num))
+ tempdir := tsTempdir
+ if tempdir == "" {
+ tempdir = os.TempDir()
+ }
+ path := filepath.Join(tempdir, fmt.Sprintf("goleveldb-test%d0%d0%d", os.Getuid(), os.Getpid(), num))
if _, err := os.Stat(path); err != nil {
stor, err = storage.OpenFile(path)
if err != nil {
@@ -436,6 +509,10 @@ func newTestStorage(t *testing.T) *testStorage {
}
f.Close()
}
+ if t.Failed() {
+ t.Logf("testing failed, test DB preserved at %s", path)
+ return nil
+ }
if tsKeepFS {
return nil
}
@@ -449,10 +526,13 @@ func newTestStorage(t *testing.T) *testStorage {
stor = storage.NewMemStorage()
}
ts := &testStorage{
- t: t,
- Storage: stor,
- closeFn: closeFn,
- opens: make(map[uint64]bool),
+ t: t,
+ Storage: stor,
+ closeFn: closeFn,
+ opens: make(map[uint64]bool),
+ emuErrOnceMap: make(map[uint64]uint),
+ emuRandErrProb: 0x999,
+ emuRandRand: rand.New(rand.NewSource(0xfacedead)),
}
ts.cond.L = &ts.mu
return ts
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go
index fdd5d2bcf..3e8df6af5 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go
@@ -7,11 +7,11 @@
package leveldb
import (
+ "fmt"
"sort"
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/cache"
- "github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
@@ -19,34 +19,41 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
-// table file
+// tFile holds basic information about a table.
type tFile struct {
- file storage.File
- seekLeft int32
- size uint64
- min, max iKey
+ file storage.File
+ seekLeft int32
+ size uint64
+ imin, imax iKey
}
-// test if key is after t
-func (t *tFile) isAfter(key []byte, ucmp comparer.BasicComparer) bool {
- return key != nil && ucmp.Compare(key, t.max.ukey()) > 0
+// Returns true if given key is after largest key of this table.
+func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
}
-// test if key is before t
-func (t *tFile) isBefore(key []byte, ucmp comparer.BasicComparer) bool {
- return key != nil && ucmp.Compare(key, t.min.ukey()) < 0
+// Returns true if given key is before smallest key of this table.
+func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
}
-func (t *tFile) incrSeek() int32 {
+// Returns true if given key range overlaps with this table key range.
+func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
+ return !t.after(icmp, umin) && !t.before(icmp, umax)
+}
+
+// Cosumes one seek and return current seeks left.
+func (t *tFile) consumeSeek() int32 {
return atomic.AddInt32(&t.seekLeft, -1)
}
-func newTFile(file storage.File, size uint64, min, max iKey) *tFile {
+// Creates new tFile.
+func newTableFile(file storage.File, size uint64, imin, imax iKey) *tFile {
f := &tFile{
file: file,
size: size,
- min: min,
- max: max,
+ imin: imin,
+ imax: imax,
}
// We arrange to automatically compact this file after
@@ -70,33 +77,52 @@ func newTFile(file storage.File, size uint64, min, max iKey) *tFile {
return f
}
-// table files
+// tFiles hold multiple tFile.
type tFiles []*tFile
func (tf tFiles) Len() int { return len(tf) }
func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
+func (tf tFiles) nums() string {
+ x := "[ "
+ for i, f := range tf {
+ if i != 0 {
+ x += ", "
+ }
+ x += fmt.Sprint(f.file.Num())
+ }
+ x += " ]"
+ return x
+}
+
+// Returns true if i smallest key is less than j.
+// This used for sort by key in ascending order.
func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
a, b := tf[i], tf[j]
- n := icmp.Compare(a.min, b.min)
+ n := icmp.Compare(a.imin, b.imin)
if n == 0 {
return a.file.Num() < b.file.Num()
}
return n < 0
}
+// Returns true if i file number is greater than j.
+// This used for sort by file number in descending order.
func (tf tFiles) lessByNum(i, j int) bool {
return tf[i].file.Num() > tf[j].file.Num()
}
+// Sorts tables by key in ascending order.
func (tf tFiles) sortByKey(icmp *iComparer) {
sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
}
+// Sorts tables by file number in descending order.
func (tf tFiles) sortByNum() {
sort.Sort(&tFilesSortByNum{tFiles: tf})
}
+// Returns sum of all tables size.
func (tf tFiles) size() (sum uint64) {
for _, t := range tf {
sum += t.size
@@ -104,94 +130,107 @@ func (tf tFiles) size() (sum uint64) {
return sum
}
-func (tf tFiles) searchMin(key iKey, icmp *iComparer) int {
+// Searches smallest index of tables whose its smallest
+// key is after or equal with given key.
+func (tf tFiles) searchMin(icmp *iComparer, ikey iKey) int {
return sort.Search(len(tf), func(i int) bool {
- return icmp.Compare(tf[i].min, key) >= 0
+ return icmp.Compare(tf[i].imin, ikey) >= 0
})
}
-func (tf tFiles) searchMax(key iKey, icmp *iComparer) int {
+// Searches smallest index of tables whose its largest
+// key is after or equal with given key.
+func (tf tFiles) searchMax(icmp *iComparer, ikey iKey) int {
return sort.Search(len(tf), func(i int) bool {
- return icmp.Compare(tf[i].max, key) >= 0
+ return icmp.Compare(tf[i].imax, ikey) >= 0
})
}
-func (tf tFiles) isOverlaps(min, max []byte, disjSorted bool, icmp *iComparer) bool {
- if !disjSorted {
- // Need to check against all files
+// Returns true if given key range overlaps with one or more
+// tables key range. If unsorted is true then binary search will not be used.
+func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
+ if unsorted {
+ // Check against all files.
for _, t := range tf {
- if !t.isAfter(min, icmp.ucmp) && !t.isBefore(max, icmp.ucmp) {
+ if t.overlaps(icmp, umin, umax) {
return true
}
}
return false
}
- var idx int
- if len(min) > 0 {
- // Find the earliest possible internal key for min
- idx = tf.searchMax(newIKey(min, kMaxSeq, tSeek), icmp)
+ i := 0
+ if len(umin) > 0 {
+ // Find the earliest possible internal key for min.
+ i = tf.searchMax(icmp, newIkey(umin, kMaxSeq, ktSeek))
}
-
- if idx >= len(tf) {
- // beginning of range is after all files, so no overlap
+ if i >= len(tf) {
+ // Beginning of range is after all files, so no overlap.
return false
}
- return !tf[idx].isBefore(max, icmp.ucmp)
+ return !tf[i].before(icmp, umax)
}
-func (tf tFiles) getOverlaps(min, max []byte, r *tFiles, disjSorted bool, ucmp comparer.BasicComparer) {
+// Returns tables whose its key range overlaps with given key range.
+// Range will be expanded if ukey found hop across tables.
+// If overlapped is true then the search will be restarted if umax
+// expanded.
+// The dst content will be overwritten.
+func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
+ dst = dst[:0]
for i := 0; i < len(tf); {
t := tf[i]
- i++
- if t.isAfter(min, ucmp) || t.isBefore(max, ucmp) {
- continue
- }
-
- *r = append(*r, t)
- if !disjSorted {
- // Level-0 files may overlap each other. So check if the newly
- // added file has expanded the range. If so, restart search.
- if min != nil && ucmp.Compare(t.min.ukey(), min) < 0 {
- min = t.min.ukey()
- *r = nil
- i = 0
- } else if max != nil && ucmp.Compare(t.max.ukey(), max) > 0 {
- max = t.max.ukey()
- *r = nil
+ if t.overlaps(icmp, umin, umax) {
+ if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
+ umin = t.imin.ukey()
+ dst = dst[:0]
i = 0
+ continue
+ } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
+ umax = t.imax.ukey()
+ // Restart search if it is overlapped.
+ if overlapped {
+ dst = dst[:0]
+ i = 0
+ continue
+ }
}
+
+ dst = append(dst, t)
}
+ i++
}
- return
+ return dst
}
-func (tf tFiles) getRange(icmp *iComparer) (min, max iKey) {
+// Returns tables key range.
+func (tf tFiles) getRange(icmp *iComparer) (imin, imax iKey) {
for i, t := range tf {
if i == 0 {
- min, max = t.min, t.max
+ imin, imax = t.imin, t.imax
continue
}
- if icmp.Compare(t.min, min) < 0 {
- min = t.min
+ if icmp.Compare(t.imin, imin) < 0 {
+ imin = t.imin
}
- if icmp.Compare(t.max, max) > 0 {
- max = t.max
+ if icmp.Compare(t.imax, imax) > 0 {
+ imax = t.imax
}
}
return
}
+// Creates iterator index from tables.
func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
if slice != nil {
var start, limit int
if slice.Start != nil {
- start = tf.searchMax(iKey(slice.Start), icmp)
+ start = tf.searchMax(icmp, iKey(slice.Start))
}
if slice.Limit != nil {
- limit = tf.searchMin(iKey(slice.Limit), icmp)
+ limit = tf.searchMin(icmp, iKey(slice.Limit))
} else {
limit = tf.Len()
}
@@ -206,6 +245,7 @@ func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range
})
}
+// Tables iterator index.
type tFilesArrayIndexer struct {
tFiles
tops *tOps
@@ -215,7 +255,7 @@ type tFilesArrayIndexer struct {
}
func (a *tFilesArrayIndexer) Search(key []byte) int {
- return a.searchMax(iKey(key), a.icmp)
+ return a.searchMax(a.icmp, iKey(key))
}
func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
@@ -225,6 +265,7 @@ func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
return a.tops.newIterator(a.tFiles[i], nil, a.ro)
}
+// Helper type for sortByKey.
type tFilesSortByKey struct {
tFiles
icmp *iComparer
@@ -234,6 +275,7 @@ func (x *tFilesSortByKey) Less(i, j int) bool {
return x.lessByKey(x.icmp, i, j)
}
+// Helper type for sortByNum.
type tFilesSortByNum struct {
tFiles
}
@@ -242,19 +284,15 @@ func (x *tFilesSortByNum) Less(i, j int) bool {
return x.lessByNum(i, j)
}
-// table operations
+// Table operations.
type tOps struct {
- s *session
- cache cache.Cache
- cacheNS cache.Namespace
-}
-
-func newTableOps(s *session, cacheCap int) *tOps {
- c := cache.NewLRUCache(cacheCap)
- ns := c.GetNamespace(0)
- return &tOps{s, c, ns}
+ s *session
+ cache *cache.Cache
+ bcache *cache.Cache
+ bpool *util.BufferPool
}
+// Creates an empty table and returns table writer.
func (t *tOps) create() (*tWriter, error) {
file := t.s.getTableFile(t.s.allocFileNum())
fw, err := file.Create()
@@ -265,14 +303,15 @@ func (t *tOps) create() (*tWriter, error) {
t: t,
file: file,
w: fw,
- tw: table.NewWriter(fw, t.s.o),
+ tw: table.NewWriter(fw, t.s.o.Options),
}, nil
}
+// Builds table from src iterator.
func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
w, err := t.create()
if err != nil {
- return f, n, err
+ return
}
defer func() {
@@ -282,7 +321,7 @@ func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
}()
for src.Next() {
- err = w.add(src.Key(), src.Value())
+ err = w.append(src.Key(), src.Value())
if err != nil {
return
}
@@ -297,84 +336,132 @@ func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
return
}
-func (t *tOps) lookup(f *tFile) (c cache.Object, err error) {
+// Opens table. It returns a cache handle, which should
+// be released after use.
+func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
num := f.file.Num()
- c, ok := t.cacheNS.Get(num, func() (ok bool, value interface{}, charge int, fin cache.SetFin) {
+ ch = t.cache.Get(0, num, func() (size int, value cache.Value) {
var r storage.Reader
r, err = f.file.Open()
if err != nil {
- return
+ return 0, nil
}
- o := t.s.o
-
- var cacheNS cache.Namespace
- if bc := o.GetBlockCache(); bc != nil {
- cacheNS = bc.GetNamespace(num)
+ var bcache *cache.CacheGetter
+ if t.bcache != nil {
+ bcache = &cache.CacheGetter{Cache: t.bcache, NS: num}
}
- ok = true
- value = table.NewReader(r, int64(f.size), cacheNS, o)
- charge = 1
- fin = func() {
+ var tr *table.Reader
+ tr, err = table.NewReader(r, int64(f.size), storage.NewFileInfo(f.file), bcache, t.bpool, t.s.o.Options)
+ if err != nil {
r.Close()
+ return 0, nil
}
- return
+ return 1, tr
+
})
- if !ok && err == nil {
+ if ch == nil && err == nil {
err = ErrClosed
}
return
}
-func (t *tOps) get(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
- c, err := t.lookup(f)
+// Finds key/value pair whose key is greater than or equal to the
+// given key.
+func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
+ ch, err := t.open(f)
if err != nil {
return nil, nil, err
}
- defer c.Release()
- return c.Value().(*table.Reader).Find(key, ro)
+ defer ch.Release()
+ return ch.Value().(*table.Reader).Find(key, true, ro)
+}
+
+// Finds key that is greater than or equal to the given key.
+func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).FindKey(key, true, ro)
}
+// Returns approximate offset of the given key.
func (t *tOps) offsetOf(f *tFile, key []byte) (offset uint64, err error) {
- c, err := t.lookup(f)
+ ch, err := t.open(f)
if err != nil {
return
}
- _offset, err := c.Value().(*table.Reader).OffsetOf(key)
- offset = uint64(_offset)
- c.Release()
- return
+ defer ch.Release()
+ offset_, err := ch.Value().(*table.Reader).OffsetOf(key)
+ return uint64(offset_), err
}
+// Creates an iterator from the given table.
func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
- c, err := t.lookup(f)
+ ch, err := t.open(f)
if err != nil {
return iterator.NewEmptyIterator(err)
}
- iter := c.Value().(*table.Reader).NewIterator(slice, ro)
- iter.SetReleaser(c)
+ iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
+ iter.SetReleaser(ch)
return iter
}
+// Removes table from persistent storage. It waits until
+// no one use the the table.
func (t *tOps) remove(f *tFile) {
num := f.file.Num()
- t.cacheNS.Delete(num, func(exist bool) {
+ t.cache.Delete(0, num, func() {
if err := f.file.Remove(); err != nil {
t.s.logf("table@remove removing @%d %q", num, err)
} else {
t.s.logf("table@remove removed @%d", num)
}
- if bc := t.s.o.GetBlockCache(); bc != nil {
- bc.GetNamespace(num).Zap(false)
+ if t.bcache != nil {
+ t.bcache.EvictNS(num)
}
})
}
+// Closes the table ops instance. It will close all tables,
+// regadless still used or not.
func (t *tOps) close() {
- t.cache.Zap(true)
+ t.bpool.Close()
+ t.cache.Close()
+ if t.bcache != nil {
+ t.bcache.Close()
+ }
+}
+
+// Creates new initialized table ops instance.
+func newTableOps(s *session) *tOps {
+ var (
+ cacher cache.Cacher
+ bcache *cache.Cache
+ )
+ if s.o.GetOpenFilesCacheCapacity() > 0 {
+ cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
+ }
+ if !s.o.DisableBlockCache {
+ var bcacher cache.Cacher
+ if s.o.GetBlockCacheCapacity() > 0 {
+ bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
+ }
+ bcache = cache.NewCache(bcacher)
+ }
+ return &tOps{
+ s: s,
+ cache: cache.NewCache(cacher),
+ bcache: bcache,
+ bpool: util.NewBufferPool(s.o.GetBlockSize() + 5),
+ }
}
+// tWriter wraps the table writer. It keep track of file descriptor
+// and added key range.
type tWriter struct {
t *tOps
@@ -385,7 +472,8 @@ type tWriter struct {
first, last []byte
}
-func (w *tWriter) add(key, value []byte) error {
+// Append key/value pair to the table.
+func (w *tWriter) append(key, value []byte) error {
if w.first == nil {
w.first = append([]byte{}, key...)
}
@@ -393,30 +481,39 @@ func (w *tWriter) add(key, value []byte) error {
return w.tw.Append(key, value)
}
+// Returns true if the table is empty.
func (w *tWriter) empty() bool {
return w.first == nil
}
+// Closes the storage.Writer.
+func (w *tWriter) close() {
+ if w.w != nil {
+ w.w.Close()
+ w.w = nil
+ }
+}
+
+// Finalizes the table and returns table file.
func (w *tWriter) finish() (f *tFile, err error) {
+ defer w.close()
err = w.tw.Close()
if err != nil {
return
}
err = w.w.Sync()
if err != nil {
- w.w.Close()
return
}
- w.w.Close()
- f = newTFile(w.file, uint64(w.tw.BytesLen()), iKey(w.first), iKey(w.last))
+ f = newTableFile(w.file, uint64(w.tw.BytesLen()), iKey(w.first), iKey(w.last))
return
}
+// Drops the table.
func (w *tWriter) drop() {
- w.w.Close()
+ w.close()
w.file.Remove()
w.t.s.reuseFileNum(w.file.Num())
- w.w = nil
w.file = nil
w.tw = nil
w.first = nil
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go
index ca598f4f5..00e6f9eea 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go
@@ -19,13 +19,18 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
-func (b *block) TestNewIterator(slice *util.Range) iterator.Iterator {
- return b.newIterator(slice, false, nil)
+type blockTesting struct {
+ tr *Reader
+ b *block
+}
+
+func (t *blockTesting) TestNewIterator(slice *util.Range) iterator.Iterator {
+ return t.tr.newBlockIter(t.b, nil, slice, false)
}
var _ = testutil.Defer(func() {
Describe("Block", func() {
- Build := func(kv *testutil.KeyValue, restartInterval int) *block {
+ Build := func(kv *testutil.KeyValue, restartInterval int) *blockTesting {
// Building the block.
bw := &blockWriter{
restartInterval: restartInterval,
@@ -39,11 +44,13 @@ var _ = testutil.Defer(func() {
// Opening the block.
data := bw.buf.Bytes()
restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
- return &block{
- cmp: comparer.DefaultComparer,
- data: data,
- restartsLen: restartsLen,
- restartsOffset: len(data) - (restartsLen+1)*4,
+ return &blockTesting{
+ tr: &Reader{cmp: comparer.DefaultComparer},
+ b: &block{
+ data: data,
+ restartsLen: restartsLen,
+ restartsOffset: len(data) - (restartsLen+1)*4,
+ },
}
}
@@ -59,7 +66,7 @@ var _ = testutil.Defer(func() {
// Make block.
br := Build(kv, restartInterval)
// Do testing.
- testutil.KeyValueTesting(nil, br, kv.Clone())
+ testutil.KeyValueTesting(nil, kv.Clone(), br, nil, nil)
}
Describe(Text(), Test)
@@ -102,11 +109,11 @@ var _ = testutil.Defer(func() {
for restartInterval := 1; restartInterval <= 5; restartInterval++ {
Describe(fmt.Sprintf("with restart interval of %d", restartInterval), func() {
// Make block.
- br := Build(kv, restartInterval)
+ bt := Build(kv, restartInterval)
Test := func(r *util.Range) func(done Done) {
return func(done Done) {
- iter := br.newIterator(r, false, nil)
+ iter := bt.TestNewIterator(r)
Expect(iter.Error()).ShouldNot(HaveOccurred())
t := testutil.IteratorTesting{
@@ -115,6 +122,7 @@ var _ = testutil.Defer(func() {
}
testutil.DoIteratorTesting(&t)
+ iter.Release()
done <- true
}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go
index 8acb9f720..6f38e84b3 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go
@@ -8,27 +8,41 @@ package table
import (
"encoding/binary"
- "errors"
"fmt"
"io"
"sort"
"strings"
+ "sync"
- "code.google.com/p/snappy-go/snappy"
+ "github.com/syndtr/gosnappy/snappy"
"github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/util"
)
var (
- ErrNotFound = util.ErrNotFound
- ErrIterReleased = errors.New("leveldb/table: iterator released")
+ ErrNotFound = errors.ErrNotFound
+ ErrReaderReleased = errors.New("leveldb/table: reader released")
+ ErrIterReleased = errors.New("leveldb/table: iterator released")
)
+type ErrCorrupted struct {
+ Pos int64
+ Size int64
+ Kind string
+ Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason)
+}
+
func max(x, y int) int {
if x > y {
return x
@@ -37,40 +51,33 @@ func max(x, y int) int {
}
type block struct {
- cmp comparer.BasicComparer
+ bpool *util.BufferPool
+ bh blockHandle
data []byte
restartsLen int
restartsOffset int
- // Whether checksum is verified and valid.
- checksum bool
}
-func (b *block) seek(rstart, rlimit int, key []byte) (index, offset int, err error) {
- n := b.restartsOffset
- data := b.data
- cmp := b.cmp
-
+func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) {
index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
- offset := int(binary.LittleEndian.Uint32(data[n+4*(rstart+i):]))
- offset += 1 // shared always zero, since this is a restart point
- v1, n1 := binary.Uvarint(data[offset:]) // key length
- _, n2 := binary.Uvarint(data[offset+n1:]) // value length
+ offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):]))
+ offset += 1 // shared always zero, since this is a restart point
+ v1, n1 := binary.Uvarint(b.data[offset:]) // key length
+ _, n2 := binary.Uvarint(b.data[offset+n1:]) // value length
m := offset + n1 + n2
- return cmp.Compare(data[m:m+int(v1)], key) > 0
+ return cmp.Compare(b.data[m:m+int(v1)], key) > 0
}) + rstart - 1
if index < rstart {
// The smallest key is greater-than key sought.
index = rstart
}
- offset = int(binary.LittleEndian.Uint32(data[n+4*index:]))
+ offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
return
}
func (b *block) restartIndex(rstart, rlimit, offset int) int {
- n := b.restartsOffset
- data := b.data
return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
- return int(binary.LittleEndian.Uint32(data[n+4*(rstart+i):])) > offset
+ return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset
}) + rstart - 1
}
@@ -81,7 +88,7 @@ func (b *block) restartOffset(index int) int {
func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) {
if offset >= b.restartsOffset {
if offset != b.restartsOffset {
- err = errors.New("leveldb/table: Reader: BlockEntry: invalid block (block entries offset not aligned)")
+ err = &ErrCorrupted{Reason: "entries offset not aligned"}
}
return
}
@@ -91,7 +98,7 @@ func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error)
m := n0 + n1 + n2
n = m + int(v1) + int(v2)
if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset {
- err = errors.New("leveldb/table: Reader: invalid block (block entries corrupted)")
+ err = &ErrCorrupted{Reason: "entries corrupted"}
return
}
key = b.data[offset+m : offset+m+int(v1)]
@@ -100,43 +107,10 @@ func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error)
return
}
-func (b *block) newIterator(slice *util.Range, inclLimit bool, cache util.Releaser) *blockIter {
- bi := &blockIter{
- block: b,
- cache: cache,
- // Valid key should never be nil.
- key: make([]byte, 0),
- dir: dirSOI,
- riStart: 0,
- riLimit: b.restartsLen,
- offsetStart: 0,
- offsetRealStart: 0,
- offsetLimit: b.restartsOffset,
- }
- if slice != nil {
- if slice.Start != nil {
- if bi.Seek(slice.Start) {
- bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
- bi.offsetStart = b.restartOffset(bi.riStart)
- bi.offsetRealStart = bi.prevOffset
- } else {
- bi.riStart = b.restartsLen
- bi.offsetStart = b.restartsOffset
- bi.offsetRealStart = b.restartsOffset
- }
- }
- if slice.Limit != nil {
- if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
- bi.offsetLimit = bi.prevOffset
- bi.riLimit = bi.restartIndex + 1
- }
- }
- bi.reset()
- if bi.offsetStart > bi.offsetLimit {
- bi.sErr(errors.New("leveldb/table: Reader: invalid slice range"))
- }
- }
- return bi
+func (b *block) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
}
type dir int
@@ -150,10 +124,12 @@ const (
)
type blockIter struct {
- block *block
- cache, releaser util.Releaser
- key, value []byte
- offset int
+ tr *Reader
+ block *block
+ blockReleaser util.Releaser
+ releaser util.Releaser
+ key, value []byte
+ offset int
// Previous offset, only filled by Next.
prevOffset int
prevNode []int
@@ -250,7 +226,7 @@ func (i *blockIter) Seek(key []byte) bool {
return false
}
- ri, offset, err := i.block.seek(i.riStart, i.riLimit, key)
+ ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key)
if err != nil {
i.sErr(err)
return false
@@ -261,7 +237,7 @@ func (i *blockIter) Seek(key []byte) bool {
i.dir = dirForward
}
for i.Next() {
- if i.block.cmp.Compare(i.key, key) >= 0 {
+ if i.tr.cmp.Compare(i.key, key) >= 0 {
return true
}
}
@@ -286,7 +262,7 @@ func (i *blockIter) Next() bool {
for i.offset < i.offsetRealStart {
key, value, nShared, n, err := i.block.entry(i.offset)
if err != nil {
- i.sErr(err)
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
return false
}
if n == 0 {
@@ -300,13 +276,13 @@ func (i *blockIter) Next() bool {
if i.offset >= i.offsetLimit {
i.dir = dirEOI
if i.offset != i.offsetLimit {
- i.sErr(errors.New("leveldb/table: Reader: Next: invalid block (block entries offset not aligned)"))
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
}
return false
}
key, value, nShared, n, err := i.block.entry(i.offset)
if err != nil {
- i.sErr(err)
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
return false
}
if n == 0 {
@@ -391,7 +367,7 @@ func (i *blockIter) Prev() bool {
for {
key, value, nShared, n, err := i.block.entry(offset)
if err != nil {
- i.sErr(err)
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
return false
}
if offset >= i.offsetRealStart {
@@ -410,7 +386,7 @@ func (i *blockIter) Prev() bool {
// Stop if target offset reached.
if offset >= i.offset {
if offset != i.offset {
- i.sErr(errors.New("leveldb/table: Reader: Prev: invalid block (block entries offset not aligned)"))
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
return false
}
@@ -437,25 +413,33 @@ func (i *blockIter) Value() []byte {
}
func (i *blockIter) Release() {
- i.prevNode = nil
- i.prevKeys = nil
- i.key = nil
- i.value = nil
- i.dir = dirReleased
- if i.cache != nil {
- i.cache.Release()
- i.cache = nil
- }
- if i.releaser != nil {
- i.releaser.Release()
- i.releaser = nil
+ if i.dir != dirReleased {
+ i.tr = nil
+ i.block = nil
+ i.prevNode = nil
+ i.prevKeys = nil
+ i.key = nil
+ i.value = nil
+ i.dir = dirReleased
+ if i.blockReleaser != nil {
+ i.blockReleaser.Release()
+ i.blockReleaser = nil
+ }
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
}
}
func (i *blockIter) SetReleaser(releaser util.Releaser) {
- if i.dir > dirReleased {
- i.releaser = releaser
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
}
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
}
func (i *blockIter) Valid() bool {
@@ -467,21 +451,21 @@ func (i *blockIter) Error() error {
}
type filterBlock struct {
- filter filter.Filter
+ bpool *util.BufferPool
data []byte
oOffset int
baseLg uint
filtersNum int
}
-func (b *filterBlock) contains(offset uint64, key []byte) bool {
+func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool {
i := int(offset >> b.baseLg)
if i < b.filtersNum {
o := b.data[b.oOffset+i*4:]
n := int(binary.LittleEndian.Uint32(o))
m := int(binary.LittleEndian.Uint32(o[4:]))
if n < m && m <= b.oOffset {
- return b.filter.Contains(b.data[n:m], key)
+ return filter.Contains(b.data[n:m], key)
} else if n == m {
return false
}
@@ -489,12 +473,17 @@ func (b *filterBlock) contains(offset uint64, key []byte) bool {
return true
}
+func (b *filterBlock) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
+}
+
type indexIter struct {
- blockIter
- tableReader *Reader
- slice *util.Range
+ *blockIter
+ tr *Reader
+ slice *util.Range
// Options
- checksum bool
fillCache bool
}
@@ -505,95 +494,173 @@ func (i *indexIter) Get() iterator.Iterator {
}
dataBH, n := decodeBlockHandle(value)
if n == 0 {
- return iterator.NewEmptyIterator(errors.New("leveldb/table: Reader: invalid table (bad data block handle)"))
+ return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle"))
}
+
var slice *util.Range
if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) {
slice = i.slice
}
- return i.tableReader.getDataIter(dataBH, slice, i.checksum, i.fillCache)
+ return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache)
}
// Reader is a table reader.
type Reader struct {
+ mu sync.RWMutex
+ fi *storage.FileInfo
reader io.ReaderAt
- cache cache.Namespace
+ cache *cache.CacheGetter
err error
+ bpool *util.BufferPool
// Options
- cmp comparer.Comparer
- filter filter.Filter
- checksum bool
- strictIter bool
+ o *opt.Options
+ cmp comparer.Comparer
+ filter filter.Filter
+ verifyChecksum bool
- dataEnd int64
- indexBlock *block
- filterBlock *filterBlock
+ dataEnd int64
+ metaBH, indexBH, filterBH blockHandle
+ indexBlock *block
+ filterBlock *filterBlock
}
-func verifyChecksum(data []byte) bool {
- n := len(data) - 4
- checksum0 := binary.LittleEndian.Uint32(data[n:])
- checksum1 := util.NewCRC(data[:n]).Value()
- return checksum0 == checksum1
+func (r *Reader) blockKind(bh blockHandle) string {
+ switch bh.offset {
+ case r.metaBH.offset:
+ return "meta-block"
+ case r.indexBH.offset:
+ return "index-block"
+ case r.filterBH.offset:
+ if r.filterBH.length > 0 {
+ return "filter-block"
+ }
+ }
+ return "data-block"
}
-func (r *Reader) readRawBlock(bh blockHandle, checksum bool) ([]byte, error) {
- data := make([]byte, bh.length+blockTrailerLen)
+func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
+ return &errors.ErrCorrupted{File: r.fi, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
+}
+
+func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
+ return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason)
+}
+
+func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
+ if cerr, ok := err.(*ErrCorrupted); ok {
+ cerr.Pos = int64(bh.offset)
+ cerr.Size = int64(bh.length)
+ cerr.Kind = r.blockKind(bh)
+ return &errors.ErrCorrupted{File: r.fi, Err: cerr}
+ }
+ return err
+}
+
+func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) {
+ data := r.bpool.Get(int(bh.length + blockTrailerLen))
if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF {
return nil, err
}
- if checksum || r.checksum {
- if !verifyChecksum(data) {
- return nil, errors.New("leveldb/table: Reader: invalid block (checksum mismatch)")
+
+ if verifyChecksum {
+ n := bh.length + 1
+ checksum0 := binary.LittleEndian.Uint32(data[n:])
+ checksum1 := util.NewCRC(data[:n]).Value()
+ if checksum0 != checksum1 {
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1))
}
}
+
switch data[bh.length] {
case blockTypeNoCompression:
data = data[:bh.length]
case blockTypeSnappyCompression:
- var err error
- data, err = snappy.Decode(nil, data[:bh.length])
+ decLen, err := snappy.DecodedLen(data[:bh.length])
if err != nil {
- return nil, err
+ return nil, r.newErrCorruptedBH(bh, err.Error())
+ }
+ decData := r.bpool.Get(decLen)
+ decData, err = snappy.Decode(decData, data[:bh.length])
+ r.bpool.Put(data)
+ if err != nil {
+ r.bpool.Put(decData)
+ return nil, r.newErrCorruptedBH(bh, err.Error())
}
+ data = decData
default:
- return nil, fmt.Errorf("leveldb/table: Reader: unknown block compression type: %d", data[bh.length])
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length]))
}
return data, nil
}
-func (r *Reader) readBlock(bh blockHandle, checksum bool) (*block, error) {
- data, err := r.readRawBlock(bh, checksum)
+func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) {
+ data, err := r.readRawBlock(bh, verifyChecksum)
if err != nil {
return nil, err
}
restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
b := &block{
- cmp: r.cmp,
+ bpool: r.bpool,
+ bh: bh,
data: data,
restartsLen: restartsLen,
restartsOffset: len(data) - (restartsLen+1)*4,
- checksum: checksum || r.checksum,
}
return b, nil
}
-func (r *Reader) readFilterBlock(bh blockHandle, filter filter.Filter) (*filterBlock, error) {
+func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) {
+ if r.cache != nil {
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *block
+ b, err = r.readBlock(bh, verifyChecksum)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*block)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
+ }
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readBlock(bh, verifyChecksum)
+ return b, b, err
+}
+
+func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) {
data, err := r.readRawBlock(bh, true)
if err != nil {
return nil, err
}
n := len(data)
if n < 5 {
- return nil, errors.New("leveldb/table: Reader: invalid filter block (too short)")
+ return nil, r.newErrCorruptedBH(bh, "too short")
}
m := n - 5
oOffset := int(binary.LittleEndian.Uint32(data[m:]))
if oOffset > m {
- return nil, errors.New("leveldb/table: Reader: invalid filter block (invalid offset)")
+ return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset")
}
b := &filterBlock{
- filter: filter,
+ bpool: r.bpool,
data: data,
oOffset: oOffset,
baseLg: uint(data[n-1]),
@@ -602,44 +669,111 @@ func (r *Reader) readFilterBlock(bh blockHandle, filter filter.Filter) (*filterB
return b, nil
}
-func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, checksum, fillCache bool) iterator.Iterator {
+func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) {
if r.cache != nil {
- // Get/set block cache.
- var err error
- cache, ok := r.cache.Get(dataBH.offset, func() (ok bool, value interface{}, charge int, fin cache.SetFin) {
- if !fillCache {
- return
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *filterBlock
+ b, err = r.readFilterBlock(bh)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*filterBlock)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
}
- var dataBlock *block
- dataBlock, err = r.readBlock(dataBH, checksum)
- if err == nil {
- ok = true
- value = dataBlock
- charge = int(dataBH.length)
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readFilterBlock(bh)
+ return b, b, err
+}
+
+func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) {
+ if r.indexBlock == nil {
+ return r.readBlockCached(r.indexBH, true, fillCache)
+ }
+ return r.indexBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) {
+ if r.filterBlock == nil {
+ return r.readFilterBlockCached(r.filterBH, fillCache)
+ }
+ return r.filterBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter {
+ bi := &blockIter{
+ tr: r,
+ block: b,
+ blockReleaser: bReleaser,
+ // Valid key should never be nil.
+ key: make([]byte, 0),
+ dir: dirSOI,
+ riStart: 0,
+ riLimit: b.restartsLen,
+ offsetStart: 0,
+ offsetRealStart: 0,
+ offsetLimit: b.restartsOffset,
+ }
+ if slice != nil {
+ if slice.Start != nil {
+ if bi.Seek(slice.Start) {
+ bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
+ bi.offsetStart = b.restartOffset(bi.riStart)
+ bi.offsetRealStart = bi.prevOffset
+ } else {
+ bi.riStart = b.restartsLen
+ bi.offsetStart = b.restartsOffset
+ bi.offsetRealStart = b.restartsOffset
}
- return
- })
- if err != nil {
- return iterator.NewEmptyIterator(err)
}
- if ok {
- dataBlock := cache.Value().(*block)
- if !dataBlock.checksum && (r.checksum || checksum) {
- if !verifyChecksum(dataBlock.data) {
- return iterator.NewEmptyIterator(errors.New("leveldb/table: Reader: invalid block (checksum mismatch)"))
- }
- dataBlock.checksum = true
+ if slice.Limit != nil {
+ if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
+ bi.offsetLimit = bi.prevOffset
+ bi.riLimit = bi.restartIndex + 1
}
- iter := dataBlock.newIterator(slice, false, cache)
- return iter
+ }
+ bi.reset()
+ if bi.offsetStart > bi.offsetLimit {
+ bi.sErr(errors.New("leveldb/table: invalid slice range"))
}
}
- dataBlock, err := r.readBlock(dataBH, checksum)
+ return bi
+}
+
+func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache)
if err != nil {
return iterator.NewEmptyIterator(err)
}
- iter := dataBlock.newIterator(slice, false, nil)
- return iter
+ return r.newBlockIter(b, rel, slice, false)
+}
+
+func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ return iterator.NewEmptyIterator(r.err)
+ }
+
+ return r.getDataIter(dataBH, slice, verifyChecksum, fillCache)
}
// NewIterator creates an iterator from the table.
@@ -653,35 +787,44 @@ func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, checksum, fi
// when not used.
//
// Also read Iterator documentation of the leveldb/iterator package.
-
func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
if r.err != nil {
return iterator.NewEmptyIterator(r.err)
}
+ fillCache := !ro.GetDontFillCache()
+ indexBlock, rel, err := r.getIndexBlock(fillCache)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
index := &indexIter{
- blockIter: *r.indexBlock.newIterator(slice, true, nil),
- tableReader: r,
- slice: slice,
- checksum: ro.GetStrict(opt.StrictBlockChecksum),
- fillCache: !ro.GetDontFillCache(),
+ blockIter: r.newBlockIter(indexBlock, rel, slice, true),
+ tr: r,
+ slice: slice,
+ fillCache: !ro.GetDontFillCache(),
}
- return iterator.NewIndexedIterator(index, r.strictIter || ro.GetStrict(opt.StrictIterator), false)
+ return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader))
}
-// Find finds key/value pair whose key is greater than or equal to the
-// given key. It returns ErrNotFound if the table doesn't contain
-// such pair.
-//
-// The caller should not modify the contents of the returned slice, but
-// it is safe to modify the contents of the argument after Find returns.
-func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err error) {
+func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
if r.err != nil {
err = r.err
return
}
- index := r.indexBlock.newIterator(nil, true, nil)
+ indexBlock, rel, err := r.getIndexBlock(true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
defer index.Release()
if !index.Seek(key) {
err = index.Error()
@@ -692,14 +835,23 @@ func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err
}
dataBH, n := decodeBlockHandle(index.Value())
if n == 0 {
- err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)")
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
return
}
- if r.filterBlock != nil && !r.filterBlock.contains(dataBH.offset, key) {
- err = ErrNotFound
- return
+ if filtered && r.filter != nil {
+ filterBlock, frel, ferr := r.getFilterBlock(true)
+ if ferr == nil {
+ if !filterBlock.contains(r.filter, dataBH.offset, key) {
+ frel.Release()
+ return nil, nil, ErrNotFound
+ }
+ frel.Release()
+ } else if !errors.IsCorrupted(ferr) {
+ err = ferr
+ return
+ }
}
- data := r.getDataIter(dataBH, nil, ro.GetStrict(opt.StrictBlockChecksum), !ro.GetDontFillCache())
+ data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
defer data.Release()
if !data.Seek(key) {
err = data.Error()
@@ -708,23 +860,64 @@ func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err
}
return
}
+ // Don't use block buffer, no need to copy the buffer.
rkey = data.Key()
- value = data.Value()
+ if !noValue {
+ if r.bpool == nil {
+ value = data.Value()
+ } else {
+ // Use block buffer, and since the buffer will be recycled, the buffer
+ // need to be copied.
+ value = append([]byte{}, data.Value()...)
+ }
+ }
+ return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such pair doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) {
+ return r.find(key, filtered, ro, false)
+}
+
+// Find finds key that is greater than or equal to the given key.
+// It returns ErrNotFound if the table doesn't contain such key.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such key doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) {
+ rkey, _, err = r.find(key, filtered, ro, true)
return
}
// Get gets the value for the given key. It returns errors.ErrNotFound
// if the table does not contain the key.
//
-// The caller should not modify the contents of the returned slice, but
-// it is safe to modify the contents of the argument after Get returns.
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
if r.err != nil {
err = r.err
return
}
- rkey, value, err := r.Find(key, ro)
+ rkey, value, err := r.find(key, false, ro, false)
if err == nil && r.cmp.Compare(rkey, key) != 0 {
value = nil
err = ErrNotFound
@@ -736,17 +929,26 @@ func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error)
//
// It is safe to modify the contents of the argument after Get returns.
func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
if r.err != nil {
err = r.err
return
}
- index := r.indexBlock.newIterator(nil, true, nil)
+ indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
defer index.Release()
if index.Seek(key) {
dataBH, n := decodeBlockHandle(index.Value())
if n == 0 {
- err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)")
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
return
}
offset = int64(dataBH.offset)
@@ -759,90 +961,147 @@ func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
return
}
-// NewReader creates a new initialized table reader for the file.
-// The cache is optional and can be nil.
-func NewReader(f io.ReaderAt, size int64, cache cache.Namespace, o *opt.Options) *Reader {
- r := &Reader{
- reader: f,
- cache: cache,
- cmp: o.GetComparer(),
- checksum: o.GetStrict(opt.StrictBlockChecksum),
- strictIter: o.GetStrict(opt.StrictIterator),
+// Release implements util.Releaser.
+// It also close the file if it is an io.Closer.
+func (r *Reader) Release() {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if closer, ok := r.reader.(io.Closer); ok {
+ closer.Close()
+ }
+ if r.indexBlock != nil {
+ r.indexBlock.Release()
+ r.indexBlock = nil
}
+ if r.filterBlock != nil {
+ r.filterBlock.Release()
+ r.filterBlock = nil
+ }
+ r.reader = nil
+ r.cache = nil
+ r.bpool = nil
+ r.err = ErrReaderReleased
+}
+
+// NewReader creates a new initialized table reader for the file.
+// The fi, cache and bpool is optional and can be nil.
+//
+// The returned table reader instance is goroutine-safe.
+func NewReader(f io.ReaderAt, size int64, fi *storage.FileInfo, cache *cache.CacheGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
if f == nil {
- r.err = errors.New("leveldb/table: Reader: nil file")
- return r
+ return nil, errors.New("leveldb/table: nil file")
}
+
+ r := &Reader{
+ fi: fi,
+ reader: f,
+ cache: cache,
+ bpool: bpool,
+ o: o,
+ cmp: o.GetComparer(),
+ verifyChecksum: o.GetStrict(opt.StrictBlockChecksum),
+ }
+
if size < footerLen {
- r.err = errors.New("leveldb/table: Reader: invalid table (file size is too small)")
- return r
+ r.err = r.newErrCorrupted(0, size, "table", "too small")
+ return r, nil
}
+
+ footerPos := size - footerLen
var footer [footerLen]byte
- if _, err := r.reader.ReadAt(footer[:], size-footerLen); err != nil && err != io.EOF {
- r.err = fmt.Errorf("leveldb/table: Reader: invalid table (could not read footer): %v", err)
+ if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF {
+ return nil, err
}
if string(footer[footerLen-len(magic):footerLen]) != magic {
- r.err = errors.New("leveldb/table: Reader: invalid table (bad magic number)")
- return r
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number")
+ return r, nil
}
+
+ var n int
// Decode the metaindex block handle.
- metaBH, n := decodeBlockHandle(footer[:])
+ r.metaBH, n = decodeBlockHandle(footer[:])
if n == 0 {
- r.err = errors.New("leveldb/table: Reader: invalid table (bad metaindex block handle)")
- return r
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle")
+ return r, nil
}
+
// Decode the index block handle.
- indexBH, n := decodeBlockHandle(footer[n:])
+ r.indexBH, n = decodeBlockHandle(footer[n:])
if n == 0 {
- r.err = errors.New("leveldb/table: Reader: invalid table (bad index block handle)")
- return r
- }
- // Read index block.
- r.indexBlock, r.err = r.readBlock(indexBH, true)
- if r.err != nil {
- return r
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle")
+ return r, nil
}
+
// Read metaindex block.
- metaBlock, err := r.readBlock(metaBH, true)
+ metaBlock, err := r.readBlock(r.metaBH, true)
if err != nil {
- r.err = err
- return r
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ } else {
+ return nil, err
+ }
}
+
// Set data end.
- r.dataEnd = int64(metaBH.offset)
- metaIter := metaBlock.newIterator(nil, false, nil)
+ r.dataEnd = int64(r.metaBH.offset)
+
+ // Read metaindex.
+ metaIter := r.newBlockIter(metaBlock, nil, nil, true)
for metaIter.Next() {
key := string(metaIter.Key())
if !strings.HasPrefix(key, "filter.") {
continue
}
fn := key[7:]
- var filter filter.Filter
if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn {
- filter = f0
+ r.filter = f0
} else {
for _, f0 := range o.GetAltFilters() {
if f0.Name() == fn {
- filter = f0
+ r.filter = f0
break
}
}
}
- if filter != nil {
+ if r.filter != nil {
filterBH, n := decodeBlockHandle(metaIter.Value())
if n == 0 {
continue
}
+ r.filterBH = filterBH
// Update data end.
r.dataEnd = int64(filterBH.offset)
- filterBlock, err := r.readFilterBlock(filterBH, filter)
- if err != nil {
- continue
- }
- r.filterBlock = filterBlock
break
}
}
metaIter.Release()
- return r
+ metaBlock.Release()
+
+ // Cache index and filter block locally, since we don't have global cache.
+ if cache == nil {
+ r.indexBlock, err = r.readBlock(r.indexBH, true)
+ if err != nil {
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ } else {
+ return nil, err
+ }
+ }
+ if r.filter != nil {
+ r.filterBlock, err = r.readFilterBlock(r.filterBH)
+ if err != nil {
+ if !errors.IsCorrupted(err) {
+ return nil, err
+ }
+
+ // Don't use filter then.
+ r.filter = nil
+ }
+ }
+ }
+
+ return r, nil
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go
index c0ac70d9e..beacdc1f0 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go
@@ -133,9 +133,9 @@ Filter block trailer:
+- 4-bytes -+
/ \
- +---------------+---------------+---------------+-------------------------+------------------+
- | offset 1 | .... | offset n | filter offset (4-bytes) | base Lg (1-byte) |
- +-------------- +---------------+---------------+-------------------------+------------------+
+ +---------------+---------------+---------------+-------------------------------+------------------+
+ | data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
+ +-------------- +---------------+---------------+-------------------------------+------------------+
NOTE: All fixed-length integer are little-endian.
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go
index bc9eb83cc..6465da6e3 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go
@@ -3,15 +3,9 @@ package table
import (
"testing"
- . "github.com/onsi/ginkgo"
- . "github.com/onsi/gomega"
-
"github.com/syndtr/goleveldb/leveldb/testutil"
)
func TestTable(t *testing.T) {
- testutil.RunDefer()
-
- RegisterFailHandler(Fail)
- RunSpecs(t, "Table Suite")
+ testutil.RunSuite(t, "Table Suite")
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go
index d7d3b2a4b..4b59b31f5 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go
@@ -23,7 +23,7 @@ type tableWrapper struct {
}
func (t tableWrapper) TestFind(key []byte) (rkey, rvalue []byte, err error) {
- return t.Reader.Find(key, nil)
+ return t.Reader.Find(key, false, nil)
}
func (t tableWrapper) TestGet(key []byte) (value []byte, err error) {
@@ -59,7 +59,8 @@ var _ = testutil.Defer(func() {
It("Should be able to approximate offset of a key correctly", func() {
Expect(err).ShouldNot(HaveOccurred())
- tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, o)
+ tr, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, nil, o)
+ Expect(err).ShouldNot(HaveOccurred())
CheckOffset := func(key string, expect, threshold int) {
offset, err := tr.OffsetOf([]byte(key))
Expect(err).ShouldNot(HaveOccurred())
@@ -95,7 +96,7 @@ var _ = testutil.Defer(func() {
tw.Close()
// Opening the table.
- tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, o)
+ tr, _ := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, nil, o)
return tableWrapper{tr}
}
Test := func(kv *testutil.KeyValue, body func(r *Reader)) func() {
@@ -104,14 +105,16 @@ var _ = testutil.Defer(func() {
if body != nil {
body(db.(tableWrapper).Reader)
}
- testutil.KeyValueTesting(nil, db, *kv)
+ testutil.KeyValueTesting(nil, *kv, db, nil, nil)
}
}
- testutil.AllKeyValueTesting(nil, Build)
+ testutil.AllKeyValueTesting(nil, Build, nil, nil)
Describe("with one key per block", Test(testutil.KeyValue_Generate(nil, 9, 1, 10, 512, 512), func(r *Reader) {
It("should have correct blocks number", func() {
- Expect(r.indexBlock.restartsLen).Should(Equal(9))
+ indexBlock, err := r.readBlock(r.indexBH, true)
+ Expect(err).To(BeNil())
+ Expect(indexBlock.restartsLen).Should(Equal(9))
})
}))
})
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go
index 4e19e93a9..274c95fad 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go
@@ -12,7 +12,7 @@ import (
"fmt"
"io"
- "code.google.com/p/snappy-go/snappy"
+ "github.com/syndtr/gosnappy/snappy"
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/filter"
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go
index 4b87b5ef6..ec3f177a1 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go
@@ -12,6 +12,7 @@ import (
. "github.com/onsi/gomega"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/util"
)
@@ -34,6 +35,10 @@ type Get interface {
TestGet(key []byte) (value []byte, err error)
}
+type Has interface {
+ TestHas(key []byte) (ret bool, err error)
+}
+
type NewIterator interface {
TestNewIterator(slice *util.Range) iterator.Iterator
}
@@ -110,7 +115,7 @@ func (t *DBTesting) TestAllPresent() {
func (t *DBTesting) TestDeletedKey(key []byte) {
_, err := t.DB.TestGet(key)
- Expect(err).Should(Equal(util.ErrNotFound), "Get on deleted key %q, %s", key, t.text())
+ Expect(err).Should(Equal(errors.ErrNotFound), "Get on deleted key %q, %s", key, t.text())
}
func (t *DBTesting) TestAllDeleted() {
@@ -212,5 +217,6 @@ func DoDBTesting(t *DBTesting) {
}
DoIteratorTesting(&it)
+ iter.Release()
}
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/ginkgo.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/ginkgo.go
new file mode 100644
index 000000000..82f3d0e81
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/ginkgo.go
@@ -0,0 +1,21 @@
+package testutil
+
+import (
+ . "github.com/onsi/ginkgo"
+ . "github.com/onsi/gomega"
+)
+
+func RunSuite(t GinkgoTestingT, name string) {
+ RunDefer()
+
+ SynchronizedBeforeSuite(func() []byte {
+ RunDefer("setup")
+ return nil
+ }, func(data []byte) {})
+ SynchronizedAfterSuite(func() {
+ RunDefer("teardown")
+ }, func() {})
+
+ RegisterFailHandler(Fail)
+ RunSpecs(t, name)
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go
index 4fc75b6f2..a0b58f0e7 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go
@@ -13,16 +13,28 @@ import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
+ "github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/util"
)
-func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
+func KeyValueTesting(rnd *rand.Rand, kv KeyValue, p DB, setup func(KeyValue) DB, teardown func(DB)) {
if rnd == nil {
rnd = NewRand()
}
- if db, ok := p.(Find); ok {
- It("Should find all keys with Find", func() {
+ if p == nil {
+ BeforeEach(func() {
+ p = setup(kv)
+ })
+ if teardown != nil {
+ AfterEach(func() {
+ teardown(p)
+ })
+ }
+ }
+
+ It("Should find all keys with Find", func() {
+ if db, ok := p.(Find); ok {
ShuffledIndex(nil, kv.Len(), 1, func(i int) {
key_, key, value := kv.IndexInexact(i)
@@ -38,9 +50,11 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
Expect(rkey).Should(Equal(key))
Expect(rvalue).Should(Equal(value), "Value for key %q (%q)", key_, key)
})
- })
+ }
+ })
- It("Should return error if the key is not present", func() {
+ It("Should return error if the key is not present", func() {
+ if db, ok := p.(Find); ok {
var key []byte
if kv.Len() > 0 {
key_, _ := kv.Index(kv.Len() - 1)
@@ -48,12 +62,12 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
}
rkey, _, err := db.TestFind(key)
Expect(err).Should(HaveOccurred(), "Find for key %q yield key %q", key, rkey)
- Expect(err).Should(Equal(util.ErrNotFound))
- })
- }
+ Expect(err).Should(Equal(errors.ErrNotFound))
+ }
+ })
- if db, ok := p.(Get); ok {
- It("Should only find exact key with Get", func() {
+ It("Should only find exact key with Get", func() {
+ if db, ok := p.(Get); ok {
ShuffledIndex(nil, kv.Len(), 1, func(i int) {
key_, key, value := kv.IndexInexact(i)
@@ -66,14 +80,34 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
if len(key_) > 0 {
_, err = db.TestGet(key_)
Expect(err).Should(HaveOccurred(), "Error for key %q", key_)
- Expect(err).Should(Equal(util.ErrNotFound))
+ Expect(err).Should(Equal(errors.ErrNotFound))
}
})
- })
- }
+ }
+ })
+
+ It("Should only find present key with Has", func() {
+ if db, ok := p.(Has); ok {
+ ShuffledIndex(nil, kv.Len(), 1, func(i int) {
+ key_, key, _ := kv.IndexInexact(i)
+
+ // Using exact key.
+ ret, err := db.TestHas(key)
+ Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key)
+ Expect(ret).Should(BeTrue(), "False for key %q", key)
- if db, ok := p.(NewIterator); ok {
- TestIter := func(r *util.Range, _kv KeyValue) {
+ // Using inexact key.
+ if len(key_) > 0 {
+ ret, err = db.TestHas(key_)
+ Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key_)
+ Expect(ret).ShouldNot(BeTrue(), "True for key %q", key)
+ }
+ })
+ }
+ })
+
+ TestIter := func(r *util.Range, _kv KeyValue) {
+ if db, ok := p.(NewIterator); ok {
iter := db.TestNewIterator(r)
Expect(iter.Error()).ShouldNot(HaveOccurred())
@@ -83,46 +117,62 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
}
DoIteratorTesting(&t)
+ iter.Release()
}
+ }
- It("Should iterates and seeks correctly", func(done Done) {
- TestIter(nil, kv.Clone())
- done <- true
- }, 3.0)
-
- RandomIndex(rnd, kv.Len(), kv.Len(), func(i int) {
- type slice struct {
- r *util.Range
- start, limit int
- }
+ It("Should iterates and seeks correctly", func(done Done) {
+ TestIter(nil, kv.Clone())
+ done <- true
+ }, 3.0)
- key_, _, _ := kv.IndexInexact(i)
- for _, x := range []slice{
- {&util.Range{Start: key_, Limit: nil}, i, kv.Len()},
- {&util.Range{Start: nil, Limit: key_}, 0, i},
- } {
- It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", x.start, x.limit), func(done Done) {
- TestIter(x.r, kv.Slice(x.start, x.limit))
- done <- true
- }, 3.0)
- }
- })
+ RandomIndex(rnd, kv.Len(), Min(kv.Len(), 50), func(i int) {
+ type slice struct {
+ r *util.Range
+ start, limit int
+ }
- RandomRange(rnd, kv.Len(), kv.Len(), func(start, limit int) {
- It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", start, limit), func(done Done) {
- r := kv.Range(start, limit)
- TestIter(&r, kv.Slice(start, limit))
+ key_, _, _ := kv.IndexInexact(i)
+ for _, x := range []slice{
+ {&util.Range{Start: key_, Limit: nil}, i, kv.Len()},
+ {&util.Range{Start: nil, Limit: key_}, 0, i},
+ } {
+ It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", x.start, x.limit), func(done Done) {
+ TestIter(x.r, kv.Slice(x.start, x.limit))
done <- true
}, 3.0)
- })
- }
+ }
+ })
+
+ RandomRange(rnd, kv.Len(), Min(kv.Len(), 50), func(start, limit int) {
+ It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", start, limit), func(done Done) {
+ r := kv.Range(start, limit)
+ TestIter(&r, kv.Slice(start, limit))
+ done <- true
+ }, 3.0)
+ })
}
-func AllKeyValueTesting(rnd *rand.Rand, body func(kv KeyValue) DB) {
+func AllKeyValueTesting(rnd *rand.Rand, body, setup func(KeyValue) DB, teardown func(DB)) {
Test := func(kv *KeyValue) func() {
return func() {
- db := body(*kv)
- KeyValueTesting(rnd, db, *kv)
+ var p DB
+ if setup != nil {
+ Defer("setup", func() {
+ p = setup(*kv)
+ })
+ }
+ if teardown != nil {
+ Defer("teardown", func() {
+ teardown(p)
+ })
+ }
+ if body != nil {
+ p = body(*kv)
+ }
+ KeyValueTesting(rnd, *kv, p, func(KeyValue) DB {
+ return p
+ }, nil)
}
}
@@ -133,4 +183,5 @@ func AllKeyValueTesting(rnd *rand.Rand, body func(kv KeyValue) DB) {
Describe("with big value", Test(KeyValue_BigValue()))
Describe("with special key", Test(KeyValue_SpecialKey()))
Describe("with multiple key/value", Test(KeyValue_MultipleKeyValue()))
+ Describe("with generated key/value", Test(KeyValue_Generate(nil, 120, 1, 50, 10, 120)))
}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go
index 0f8d77a73..59c496d54 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go
@@ -397,6 +397,7 @@ func (s *Storage) logI(format string, args ...interface{}) {
func (s *Storage) Log(str string) {
s.log(1, "Log: "+str)
+ s.Storage.Log(str)
}
func (s *Storage) Lock() (r util.Releaser, err error) {
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go
index 38fe25d52..97c5294b1 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go
@@ -155,3 +155,17 @@ func RandomRange(rnd *rand.Rand, n, round int, fn func(start, limit int)) {
}
return
}
+
+func Max(x, y int) int {
+ if x > y {
+ return x
+ }
+ return y
+}
+
+func Min(x, y int) int {
+ if x < y {
+ return x
+ }
+ return y
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go
index c1402fda3..25bf2b29f 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go
@@ -34,6 +34,10 @@ func (t *testingDB) TestGet(key []byte) (value []byte, err error) {
return t.Get(key, t.ro)
}
+func (t *testingDB) TestHas(key []byte) (ret bool, err error) {
+ return t.Has(key, t.ro)
+}
+
func (t *testingDB) TestNewIterator(slice *util.Range) iterator.Iterator {
return t.NewIterator(slice, t.ro)
}
@@ -48,6 +52,7 @@ func (t *testingDB) TestClose() {
func newTestingDB(o *opt.Options, ro *opt.ReadOptions, wo *opt.WriteOptions) *testingDB {
stor := testutil.NewStorage()
db, err := Open(stor, o)
+ // FIXME: This may be called from outside It, which may cause panic.
Expect(err).NotTo(HaveOccurred())
return &testingDB{
DB: db,
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go
index a43d2e460..1a5bf71a3 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go
@@ -14,10 +14,10 @@ import (
)
func shorten(str string) string {
- if len(str) <= 4 {
+ if len(str) <= 8 {
return str
}
- return str[:1] + ".." + str[len(str)-1:]
+ return str[:3] + ".." + str[len(str)-3:]
}
var bunits = [...]string{"", "Ki", "Mi", "Gi"}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
new file mode 100644
index 000000000..2b8453d75
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
@@ -0,0 +1,238 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type buffer struct {
+ b []byte
+ miss int
+}
+
+// BufferPool is a 'buffer pool'.
+type BufferPool struct {
+ pool [6]chan []byte
+ size [5]uint32
+ sizeMiss [5]uint32
+ sizeHalf [5]uint32
+ baseline [4]int
+ baseline0 int
+
+ mu sync.RWMutex
+ closed bool
+ closeC chan struct{}
+
+ get uint32
+ put uint32
+ half uint32
+ less uint32
+ equal uint32
+ greater uint32
+ miss uint32
+}
+
+func (p *BufferPool) poolNum(n int) int {
+ if n <= p.baseline0 && n > p.baseline0/2 {
+ return 0
+ }
+ for i, x := range p.baseline {
+ if n <= x {
+ return i + 1
+ }
+ }
+ return len(p.baseline) + 1
+}
+
+// Get returns buffer with length of n.
+func (p *BufferPool) Get(n int) []byte {
+ if p == nil {
+ return make([]byte, n)
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return make([]byte, n)
+ }
+
+ atomic.AddUint32(&p.get, 1)
+
+ poolNum := p.poolNum(n)
+ pool := p.pool[poolNum]
+ if poolNum == 0 {
+ // Fast path.
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ select {
+ case pool <- b:
+ default:
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ return make([]byte, n, p.baseline0)
+ } else {
+ sizePtr := &p.size[poolNum-1]
+
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ sizeHalfPtr := &p.sizeHalf[poolNum-1]
+ if atomic.AddUint32(sizeHalfPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(cap(b)/2))
+ atomic.StoreUint32(sizeHalfPtr, 0)
+ } else {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ if uint32(cap(b)) >= atomic.LoadUint32(sizePtr) {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ if size := atomic.LoadUint32(sizePtr); uint32(n) > size {
+ if size == 0 {
+ atomic.CompareAndSwapUint32(sizePtr, 0, uint32(n))
+ } else {
+ sizeMissPtr := &p.sizeMiss[poolNum-1]
+ if atomic.AddUint32(sizeMissPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(n))
+ atomic.StoreUint32(sizeMissPtr, 0)
+ }
+ }
+ return make([]byte, n)
+ } else {
+ return make([]byte, n, size)
+ }
+ }
+}
+
+// Put adds given buffer to the pool.
+func (p *BufferPool) Put(b []byte) {
+ if p == nil {
+ return
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return
+ }
+
+ atomic.AddUint32(&p.put, 1)
+
+ pool := p.pool[p.poolNum(cap(b))]
+ select {
+ case pool <- b:
+ default:
+ }
+
+}
+
+func (p *BufferPool) Close() {
+ if p == nil {
+ return
+ }
+
+ p.mu.Lock()
+ if !p.closed {
+ p.closed = true
+ p.closeC <- struct{}{}
+ }
+ p.mu.Unlock()
+}
+
+func (p *BufferPool) String() string {
+ if p == nil {
+ return "<nil>"
+ }
+
+ return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}",
+ p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss)
+}
+
+func (p *BufferPool) drain() {
+ ticker := time.NewTicker(2 * time.Second)
+ for {
+ select {
+ case <-ticker.C:
+ for _, ch := range p.pool {
+ select {
+ case <-ch:
+ default:
+ }
+ }
+ case <-p.closeC:
+ close(p.closeC)
+ for _, ch := range p.pool {
+ close(ch)
+ }
+ return
+ }
+ }
+}
+
+// NewBufferPool creates a new initialized 'buffer pool'.
+func NewBufferPool(baseline int) *BufferPool {
+ if baseline <= 0 {
+ panic("baseline can't be <= 0")
+ }
+ p := &BufferPool{
+ baseline0: baseline,
+ baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4},
+ closeC: make(chan struct{}, 1),
+ }
+ for i, cap := range []int{2, 2, 4, 4, 2, 1} {
+ p.pool[i] = make(chan []byte, cap)
+ }
+ go p.drain()
+ return p
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool.go
new file mode 100644
index 000000000..1f7fdd41f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool.go
@@ -0,0 +1,21 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build go1.3
+
+package util
+
+import (
+ "sync"
+)
+
+type Pool struct {
+ sync.Pool
+}
+
+func NewPool(cap int) *Pool {
+ return &Pool{}
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go
new file mode 100644
index 000000000..27b8d03be
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go
@@ -0,0 +1,33 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build !go1.3
+
+package util
+
+type Pool struct {
+ pool chan interface{}
+}
+
+func (p *Pool) Get() interface{} {
+ select {
+ case x := <-p.pool:
+ return x
+ default:
+ return nil
+ }
+}
+
+func (p *Pool) Put(x interface{}) {
+ select {
+ case p.pool <- x:
+ default:
+ }
+}
+
+func NewPool(cap int) *Pool {
+ return &Pool{pool: make(chan interface{}, cap)}
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go
index da0583123..85159583d 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go
@@ -14,3 +14,19 @@ type Range struct {
// Limit of the key range, not include in the range.
Limit []byte
}
+
+// BytesPrefix returns key range that satisfy the given prefix.
+// This only applicable for the standard 'bytes comparer'.
+func BytesPrefix(prefix []byte) *Range {
+ var limit []byte
+ for i := len(prefix) - 1; i >= 0; i-- {
+ c := prefix[i]
+ if c < 0xff {
+ limit = make([]byte, i+1)
+ copy(limit, prefix)
+ limit[i] = c + 1
+ break
+ }
+ }
+ return &Range{prefix, limit}
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go
index 229c7d41f..f35976865 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go
@@ -12,7 +12,8 @@ import (
)
var (
- ErrNotFound = errors.New("leveldb: not found")
+ ErrReleased = errors.New("leveldb: resource already relesed")
+ ErrHasReleaser = errors.New("leveldb: releaser already defined")
)
// Releaser is the interface that wraps the basic Release method.
@@ -27,23 +28,46 @@ type ReleaseSetter interface {
// SetReleaser associates the given releaser to the resources. The
// releaser will be called once coresponding resources released.
// Calling SetReleaser with nil will clear the releaser.
+ //
+ // This will panic if a releaser already present or coresponding
+ // resource is already released. Releaser should be cleared first
+ // before assigned a new one.
SetReleaser(releaser Releaser)
}
// BasicReleaser provides basic implementation of Releaser and ReleaseSetter.
type BasicReleaser struct {
releaser Releaser
+ released bool
+}
+
+// Released returns whether Release method already called.
+func (r *BasicReleaser) Released() bool {
+ return r.released
}
// Release implements Releaser.Release.
func (r *BasicReleaser) Release() {
- if r.releaser != nil {
- r.releaser.Release()
- r.releaser = nil
+ if !r.released {
+ if r.releaser != nil {
+ r.releaser.Release()
+ r.releaser = nil
+ }
+ r.released = true
}
}
// SetReleaser implements ReleaseSetter.SetReleaser.
func (r *BasicReleaser) SetReleaser(releaser Releaser) {
+ if r.released {
+ panic(ErrReleased)
+ }
+ if r.releaser != nil && releaser != nil {
+ panic(ErrHasReleaser)
+ }
r.releaser = releaser
}
+
+type NoopReleaser struct{}
+
+func (NoopReleaser) Release() {}
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go
index 4c54d6480..88a52f53e 100644
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go
@@ -7,7 +7,6 @@
package leveldb
import (
- "errors"
"sync/atomic"
"unsafe"
@@ -16,19 +15,6 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
-var levelMaxSize [kNumLevels]float64
-
-func init() {
- // Precompute max size of each level
- for level := range levelMaxSize {
- res := float64(10 * 1048576)
- for n := level; n > 1; n-- {
- res *= 10
- }
- levelMaxSize[level] = res
- }
-}
-
type tSet struct {
level int
table *tFile
@@ -37,21 +23,26 @@ type tSet struct {
type version struct {
s *session
- tables [kNumLevels]tFiles
+ tables []tFiles
// Level that should be compacted next and its compaction score.
- // Score < 1 means compaction is not strictly needed. These fields
- // are initialized by ComputeCompaction()
+ // Score < 1 means compaction is not strictly needed. These fields
+ // are initialized by computeCompaction()
cLevel int
cScore float64
cSeek unsafe.Pointer
- ref int
+ ref int
+ // Succeeding version.
next *version
}
-func (v *version) release_NB() {
+func newVersion(s *session) *version {
+ return &version{s: s, tables: make([]tFiles, s.o.GetNumLevel())}
+}
+
+func (v *version) releaseNB() {
v.ref--
if v.ref > 0 {
return
@@ -60,8 +51,6 @@ func (v *version) release_NB() {
panic("negative version ref")
}
- s := v.s
-
tables := make(map[uint64]bool)
for _, tt := range v.next.tables {
for _, t := range tt {
@@ -74,145 +63,184 @@ func (v *version) release_NB() {
for _, t := range tt {
num := t.file.Num()
if _, ok := tables[num]; !ok {
- s.tops.remove(t)
+ v.s.tops.remove(t)
}
}
}
- v.next.release_NB()
+ v.next.releaseNB()
v.next = nil
}
func (v *version) release() {
v.s.vmu.Lock()
- v.release_NB()
+ v.releaseNB()
v.s.vmu.Unlock()
}
-func (v *version) get(key iKey, ro *opt.ReadOptions) (value []byte, cstate bool, err error) {
- s := v.s
-
- ukey := key.ukey()
+func (v *version) walkOverlapping(ikey iKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
+ ukey := ikey.ukey()
- var tset *tSet
- tseek := true
-
- // We can search level-by-level since entries never hop across
- // levels. Therefore we are guaranteed that if we find data
- // in an smaller level, later levels are irrelevant.
- for level, ts := range v.tables {
- if len(ts) == 0 {
+ // Walk tables level-by-level.
+ for level, tables := range v.tables {
+ if len(tables) == 0 {
continue
}
if level == 0 {
// Level-0 files may overlap each other. Find all files that
- // overlap user_key and process them in order from newest to
- var tmp tFiles
- for _, t := range ts {
- if s.icmp.uCompare(ukey, t.min.ukey()) >= 0 &&
- s.icmp.uCompare(ukey, t.max.ukey()) <= 0 {
- tmp = append(tmp, t)
+ // overlap ukey.
+ for _, t := range tables {
+ if t.overlaps(v.s.icmp, ukey, ukey) {
+ if !f(level, t) {
+ return
+ }
}
}
-
- if len(tmp) == 0 {
- continue
- }
-
- tmp.sortByNum()
- ts = tmp
} else {
- i := ts.searchMax(key, s.icmp)
- if i >= len(ts) || s.icmp.uCompare(ukey, ts[i].min.ukey()) < 0 {
- continue
+ if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
+ t := tables[i]
+ if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ if !f(level, t) {
+ return
+ }
+ }
}
+ }
- ts = ts[i : i+1]
+ if lf != nil && !lf(level) {
+ return
}
+ }
+}
- var l0found bool
- var l0seq uint64
- var l0type vType
- var l0value []byte
- for _, t := range ts {
- if tseek {
- if tset == nil {
- tset = &tSet{level, t}
- } else if tset.table.incrSeek() <= 0 {
- cstate = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
- tseek = false
- }
- }
+func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
+ ukey := ikey.ukey()
- var _rkey, rval []byte
- _rkey, rval, err = s.tops.get(t, key, ro)
- if err == ErrNotFound {
- continue
- } else if err != nil {
- return
+ var (
+ tset *tSet
+ tseek bool
+
+ // Level-0.
+ zfound bool
+ zseq uint64
+ zkt kType
+ zval []byte
+ )
+
+ err = ErrNotFound
+
+ // Since entries never hope across level, finding key/value
+ // in smaller level make later levels irrelevant.
+ v.walkOverlapping(ikey, func(level int, t *tFile) bool {
+ if !tseek {
+ if tset == nil {
+ tset = &tSet{level, t}
+ } else {
+ tseek = true
}
+ }
- rkey := iKey(_rkey)
- if seq, t, ok := rkey.parseNum(); ok {
- if s.icmp.uCompare(ukey, rkey.ukey()) == 0 {
- if level == 0 {
- if seq >= l0seq {
- l0found = true
- l0seq = seq
- l0type = t
- l0value = rval
- }
- } else {
- switch t {
- case tVal:
- value = rval
- case tDel:
- err = ErrNotFound
- default:
- panic("invalid type")
- }
- return
+ var (
+ fikey, fval []byte
+ ferr error
+ )
+ if noValue {
+ fikey, ferr = v.s.tops.findKey(t, ikey, ro)
+ } else {
+ fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
+ }
+ switch ferr {
+ case nil:
+ case ErrNotFound:
+ return true
+ default:
+ err = ferr
+ return false
+ }
+
+ if fukey, fseq, fkt, fkerr := parseIkey(fikey); fkerr == nil {
+ if v.s.icmp.uCompare(ukey, fukey) == 0 {
+ if level == 0 {
+ if fseq >= zseq {
+ zfound = true
+ zseq = fseq
+ zkt = fkt
+ zval = fval
}
+ } else {
+ switch fkt {
+ case ktVal:
+ value = fval
+ err = nil
+ case ktDel:
+ default:
+ panic("leveldb: invalid iKey type")
+ }
+ return false
}
- } else {
- err = errors.New("leveldb: internal key corrupted")
- return
}
+ } else {
+ err = fkerr
+ return false
}
- if level == 0 && l0found {
- switch l0type {
- case tVal:
- value = l0value
- case tDel:
- err = ErrNotFound
+
+ return true
+ }, func(level int) bool {
+ if zfound {
+ switch zkt {
+ case ktVal:
+ value = zval
+ err = nil
+ case ktDel:
default:
- panic("invalid type")
+ panic("leveldb: invalid iKey type")
}
- return
+ return false
}
+
+ return true
+ })
+
+ if tseek && tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
}
- err = ErrNotFound
return
}
-func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
- s := v.s
+func (v *version) sampleSeek(ikey iKey) (tcomp bool) {
+ var tset *tSet
+ v.walkOverlapping(ikey, func(level int, t *tFile) bool {
+ if tset == nil {
+ tset = &tSet{level, t}
+ return true
+ } else {
+ if tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+ }
+ return false
+ }
+ }, nil)
+
+ return
+}
+
+func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
// Merge all level zero files together since they may overlap
for _, t := range v.tables[0] {
- it := s.tops.newIterator(t, slice, ro)
+ it := v.s.tops.newIterator(t, slice, ro)
its = append(its, it)
}
- strict := s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator)
- for _, tt := range v.tables[1:] {
- if len(tt) == 0 {
+ strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
+ for _, tables := range v.tables[1:] {
+ if len(tables) == 0 {
continue
}
- it := iterator.NewIndexedIterator(tt.newIndexIterator(s.tops, s.icmp, slice, ro), strict, true)
+ it := iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict)
its = append(its, it)
}
@@ -220,7 +248,7 @@ func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []it
}
func (v *version) newStaging() *versionStaging {
- return &versionStaging{base: v}
+ return &versionStaging{base: v, tables: make([]tablesScratch, v.s.o.GetNumLevel())}
}
// Spawn a new version based on this version.
@@ -242,25 +270,25 @@ func (v *version) tLen(level int) int {
return len(v.tables[level])
}
-func (v *version) offsetOf(key iKey) (n uint64, err error) {
- for level, tt := range v.tables {
- for _, t := range tt {
- if v.s.icmp.Compare(t.max, key) <= 0 {
- // Entire file is before "key", so just add the file size
+func (v *version) offsetOf(ikey iKey) (n uint64, err error) {
+ for level, tables := range v.tables {
+ for _, t := range tables {
+ if v.s.icmp.Compare(t.imax, ikey) <= 0 {
+ // Entire file is before "ikey", so just add the file size
n += t.size
- } else if v.s.icmp.Compare(t.min, key) > 0 {
- // Entire file is after "key", so ignore
+ } else if v.s.icmp.Compare(t.imin, ikey) > 0 {
+ // Entire file is after "ikey", so ignore
if level > 0 {
// Files other than level 0 are sorted by meta->min, so
// no further files in this level will contain data for
- // "key".
+ // "ikey".
break
}
} else {
- // "key" falls in the range for this table. Add the
- // approximate offset of "key" within the table.
+ // "ikey" falls in the range for this table. Add the
+ // approximate offset of "ikey" within the table.
var nn uint64
- nn, err = v.s.tops.offsetOf(t, key)
+ nn, err = v.s.tops.offsetOf(t, ikey)
if err != nil {
return 0, err
}
@@ -272,15 +300,16 @@ func (v *version) offsetOf(key iKey) (n uint64, err error) {
return
}
-func (v *version) pickLevel(min, max []byte) (level int) {
- if !v.tables[0].isOverlaps(min, max, false, v.s.icmp) {
- var r tFiles
- for ; level < kMaxMemCompactLevel; level++ {
- if v.tables[level+1].isOverlaps(min, max, true, v.s.icmp) {
+func (v *version) pickLevel(umin, umax []byte) (level int) {
+ if !v.tables[0].overlaps(v.s.icmp, umin, umax, true) {
+ var overlaps tFiles
+ maxLevel := v.s.o.GetMaxMemCompationLevel()
+ for ; level < maxLevel; level++ {
+ if v.tables[level+1].overlaps(v.s.icmp, umin, umax, false) {
break
}
- v.tables[level+2].getOverlaps(min, max, &r, true, v.s.icmp.ucmp)
- if r.size() > kMaxGrandParentOverlapBytes {
+ overlaps = v.tables[level+2].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
+ if overlaps.size() > uint64(v.s.o.GetCompactionGPOverlaps(level)) {
break
}
}
@@ -294,7 +323,7 @@ func (v *version) computeCompaction() {
var bestLevel int = -1
var bestScore float64 = -1
- for level, ff := range v.tables {
+ for level, tables := range v.tables {
var score float64
if level == 0 {
// We treat level-0 specially by bounding the number of files
@@ -308,9 +337,9 @@ func (v *version) computeCompaction() {
// file size is small (perhaps because of a small write-buffer
// setting, or very high compression ratios, or lots of
// overwrites/deletions).
- score = float64(len(ff)) / kL0_CompactionTrigger
+ score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
} else {
- score = float64(ff.size()) / levelMaxSize[level]
+ score = float64(tables.size()) / float64(v.s.o.GetCompactionTotalSize(level))
}
if score > bestScore {
@@ -327,66 +356,62 @@ func (v *version) needCompaction() bool {
return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
}
+type tablesScratch struct {
+ added map[uint64]atRecord
+ deleted map[uint64]struct{}
+}
+
type versionStaging struct {
base *version
- tables [kNumLevels]struct {
- added map[uint64]ntRecord
- deleted map[uint64]struct{}
- }
+ tables []tablesScratch
}
func (p *versionStaging) commit(r *sessionRecord) {
- btt := p.base.tables
-
- // deleted tables
- for _, tr := range r.deletedTables {
- tm := &(p.tables[tr.level])
+ // Deleted tables.
+ for _, r := range r.deletedTables {
+ tm := &(p.tables[r.level])
- bt := btt[tr.level]
- if len(bt) > 0 {
+ if len(p.base.tables[r.level]) > 0 {
if tm.deleted == nil {
tm.deleted = make(map[uint64]struct{})
}
- tm.deleted[tr.num] = struct{}{}
+ tm.deleted[r.num] = struct{}{}
}
if tm.added != nil {
- delete(tm.added, tr.num)
+ delete(tm.added, r.num)
}
}
- // new tables
- for _, tr := range r.addedTables {
- tm := &(p.tables[tr.level])
+ // New tables.
+ for _, r := range r.addedTables {
+ tm := &(p.tables[r.level])
if tm.added == nil {
- tm.added = make(map[uint64]ntRecord)
+ tm.added = make(map[uint64]atRecord)
}
- tm.added[tr.num] = tr
+ tm.added[r.num] = r
if tm.deleted != nil {
- delete(tm.deleted, tr.num)
+ delete(tm.deleted, r.num)
}
}
}
func (p *versionStaging) finish() *version {
- s := p.base.s
- btt := p.base.tables
-
- // build new version
- nv := &version{s: s}
+ // Build new version.
+ nv := newVersion(p.base.s)
for level, tm := range p.tables {
- bt := btt[level]
+ btables := p.base.tables[level]
- n := len(bt) + len(tm.added) - len(tm.deleted)
+ n := len(btables) + len(tm.added) - len(tm.deleted)
if n < 0 {
n = 0
}
nt := make(tFiles, 0, n)
- // base tables
- for _, t := range bt {
+ // Base tables.
+ for _, t := range btables {
if _, ok := tm.deleted[t.file.Num()]; ok {
continue
}
@@ -396,17 +421,21 @@ func (p *versionStaging) finish() *version {
nt = append(nt, t)
}
- // new tables
- for _, tr := range tm.added {
- nt = append(nt, tr.makeFile(s))
+ // New tables.
+ for _, r := range tm.added {
+ nt = append(nt, p.base.s.tableFileFromRecord(r))
}
- // sort tables
- nt.sortByKey(s.icmp)
+ // Sort tables.
+ if level == 0 {
+ nt.sortByNum()
+ } else {
+ nt.sortByKey(p.base.s.icmp)
+ }
nv.tables[level] = nt
}
- // compute compaction score for new version
+ // Compute compaction score for new version.
nv.computeCompaction()
return nv
@@ -421,7 +450,7 @@ func (vr *versionReleaser) Release() {
v := vr.v
v.s.vmu.Lock()
if !vr.once {
- v.release_NB()
+ v.releaseNB()
vr.once = true
}
v.s.vmu.Unlock()
diff --git a/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/decode.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/decode.go
new file mode 100644
index 000000000..552a17bfb
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/decode.go
@@ -0,0 +1,292 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+ "encoding/binary"
+ "errors"
+ "io"
+)
+
+var (
+ // ErrCorrupt reports that the input is invalid.
+ ErrCorrupt = errors.New("snappy: corrupt input")
+ // ErrUnsupported reports that the input isn't supported.
+ ErrUnsupported = errors.New("snappy: unsupported input")
+)
+
+// DecodedLen returns the length of the decoded block.
+func DecodedLen(src []byte) (int, error) {
+ v, _, err := decodedLen(src)
+ return v, err
+}
+
+// decodedLen returns the length of the decoded block and the number of bytes
+// that the length header occupied.
+func decodedLen(src []byte) (blockLen, headerLen int, err error) {
+ v, n := binary.Uvarint(src)
+ if n == 0 {
+ return 0, 0, ErrCorrupt
+ }
+ if uint64(int(v)) != v {
+ return 0, 0, errors.New("snappy: decoded block is too large")
+ }
+ return int(v), n, nil
+}
+
+// Decode returns the decoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire decoded block.
+// Otherwise, a newly allocated slice will be returned.
+// It is valid to pass a nil dst.
+func Decode(dst, src []byte) ([]byte, error) {
+ dLen, s, err := decodedLen(src)
+ if err != nil {
+ return nil, err
+ }
+ if len(dst) < dLen {
+ dst = make([]byte, dLen)
+ }
+
+ var d, offset, length int
+ for s < len(src) {
+ switch src[s] & 0x03 {
+ case tagLiteral:
+ x := uint(src[s] >> 2)
+ switch {
+ case x < 60:
+ s += 1
+ case x == 60:
+ s += 2
+ if s > len(src) {
+ return nil, ErrCorrupt
+ }
+ x = uint(src[s-1])
+ case x == 61:
+ s += 3
+ if s > len(src) {
+ return nil, ErrCorrupt
+ }
+ x = uint(src[s-2]) | uint(src[s-1])<<8
+ case x == 62:
+ s += 4
+ if s > len(src) {
+ return nil, ErrCorrupt
+ }
+ x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16
+ case x == 63:
+ s += 5
+ if s > len(src) {
+ return nil, ErrCorrupt
+ }
+ x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24
+ }
+ length = int(x + 1)
+ if length <= 0 {
+ return nil, errors.New("snappy: unsupported literal length")
+ }
+ if length > len(dst)-d || length > len(src)-s {
+ return nil, ErrCorrupt
+ }
+ copy(dst[d:], src[s:s+length])
+ d += length
+ s += length
+ continue
+
+ case tagCopy1:
+ s += 2
+ if s > len(src) {
+ return nil, ErrCorrupt
+ }
+ length = 4 + int(src[s-2])>>2&0x7
+ offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
+
+ case tagCopy2:
+ s += 3
+ if s > len(src) {
+ return nil, ErrCorrupt
+ }
+ length = 1 + int(src[s-3])>>2
+ offset = int(src[s-2]) | int(src[s-1])<<8
+
+ case tagCopy4:
+ return nil, errors.New("snappy: unsupported COPY_4 tag")
+ }
+
+ end := d + length
+ if offset > d || end > len(dst) {
+ return nil, ErrCorrupt
+ }
+ for ; d < end; d++ {
+ dst[d] = dst[d-offset]
+ }
+ }
+ if d != dLen {
+ return nil, ErrCorrupt
+ }
+ return dst[:d], nil
+}
+
+// NewReader returns a new Reader that decompresses from r, using the framing
+// format described at
+// https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+func NewReader(r io.Reader) *Reader {
+ return &Reader{
+ r: r,
+ decoded: make([]byte, maxUncompressedChunkLen),
+ buf: make([]byte, MaxEncodedLen(maxUncompressedChunkLen)+checksumSize),
+ }
+}
+
+// Reader is an io.Reader than can read Snappy-compressed bytes.
+type Reader struct {
+ r io.Reader
+ err error
+ decoded []byte
+ buf []byte
+ // decoded[i:j] contains decoded bytes that have not yet been passed on.
+ i, j int
+ readHeader bool
+}
+
+// Reset discards any buffered data, resets all state, and switches the Snappy
+// reader to read from r. This permits reusing a Reader rather than allocating
+// a new one.
+func (r *Reader) Reset(reader io.Reader) {
+ r.r = reader
+ r.err = nil
+ r.i = 0
+ r.j = 0
+ r.readHeader = false
+}
+
+func (r *Reader) readFull(p []byte) (ok bool) {
+ if _, r.err = io.ReadFull(r.r, p); r.err != nil {
+ if r.err == io.ErrUnexpectedEOF {
+ r.err = ErrCorrupt
+ }
+ return false
+ }
+ return true
+}
+
+// Read satisfies the io.Reader interface.
+func (r *Reader) Read(p []byte) (int, error) {
+ if r.err != nil {
+ return 0, r.err
+ }
+ for {
+ if r.i < r.j {
+ n := copy(p, r.decoded[r.i:r.j])
+ r.i += n
+ return n, nil
+ }
+ if !r.readFull(r.buf[:4]) {
+ return 0, r.err
+ }
+ chunkType := r.buf[0]
+ if !r.readHeader {
+ if chunkType != chunkTypeStreamIdentifier {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ r.readHeader = true
+ }
+ chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
+ if chunkLen > len(r.buf) {
+ r.err = ErrUnsupported
+ return 0, r.err
+ }
+
+ // The chunk types are specified at
+ // https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+ switch chunkType {
+ case chunkTypeCompressedData:
+ // Section 4.2. Compressed data (chunk type 0x00).
+ if chunkLen < checksumSize {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ buf := r.buf[:chunkLen]
+ if !r.readFull(buf) {
+ return 0, r.err
+ }
+ checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+ buf = buf[checksumSize:]
+
+ n, err := DecodedLen(buf)
+ if err != nil {
+ r.err = err
+ return 0, r.err
+ }
+ if n > len(r.decoded) {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ if _, err := Decode(r.decoded, buf); err != nil {
+ r.err = err
+ return 0, r.err
+ }
+ if crc(r.decoded[:n]) != checksum {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ r.i, r.j = 0, n
+ continue
+
+ case chunkTypeUncompressedData:
+ // Section 4.3. Uncompressed data (chunk type 0x01).
+ if chunkLen < checksumSize {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ buf := r.buf[:checksumSize]
+ if !r.readFull(buf) {
+ return 0, r.err
+ }
+ checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+ // Read directly into r.decoded instead of via r.buf.
+ n := chunkLen - checksumSize
+ if !r.readFull(r.decoded[:n]) {
+ return 0, r.err
+ }
+ if crc(r.decoded[:n]) != checksum {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ r.i, r.j = 0, n
+ continue
+
+ case chunkTypeStreamIdentifier:
+ // Section 4.1. Stream identifier (chunk type 0xff).
+ if chunkLen != len(magicBody) {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ if !r.readFull(r.buf[:len(magicBody)]) {
+ return 0, r.err
+ }
+ for i := 0; i < len(magicBody); i++ {
+ if r.buf[i] != magicBody[i] {
+ r.err = ErrCorrupt
+ return 0, r.err
+ }
+ }
+ continue
+ }
+
+ if chunkType <= 0x7f {
+ // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
+ r.err = ErrUnsupported
+ return 0, r.err
+
+ } else {
+ // Section 4.4 Padding (chunk type 0xfe).
+ // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
+ if !r.readFull(r.buf[:chunkLen]) {
+ return 0, r.err
+ }
+ }
+ }
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/encode.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/encode.go
new file mode 100644
index 000000000..dda372422
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/encode.go
@@ -0,0 +1,258 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+ "encoding/binary"
+ "io"
+)
+
+// We limit how far copy back-references can go, the same as the C++ code.
+const maxOffset = 1 << 15
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+func emitLiteral(dst, lit []byte) int {
+ i, n := 0, uint(len(lit)-1)
+ switch {
+ case n < 60:
+ dst[0] = uint8(n)<<2 | tagLiteral
+ i = 1
+ case n < 1<<8:
+ dst[0] = 60<<2 | tagLiteral
+ dst[1] = uint8(n)
+ i = 2
+ case n < 1<<16:
+ dst[0] = 61<<2 | tagLiteral
+ dst[1] = uint8(n)
+ dst[2] = uint8(n >> 8)
+ i = 3
+ case n < 1<<24:
+ dst[0] = 62<<2 | tagLiteral
+ dst[1] = uint8(n)
+ dst[2] = uint8(n >> 8)
+ dst[3] = uint8(n >> 16)
+ i = 4
+ case int64(n) < 1<<32:
+ dst[0] = 63<<2 | tagLiteral
+ dst[1] = uint8(n)
+ dst[2] = uint8(n >> 8)
+ dst[3] = uint8(n >> 16)
+ dst[4] = uint8(n >> 24)
+ i = 5
+ default:
+ panic("snappy: source buffer is too long")
+ }
+ if copy(dst[i:], lit) != len(lit) {
+ panic("snappy: destination buffer is too short")
+ }
+ return i + len(lit)
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+func emitCopy(dst []byte, offset, length int) int {
+ i := 0
+ for length > 0 {
+ x := length - 4
+ if 0 <= x && x < 1<<3 && offset < 1<<11 {
+ dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
+ dst[i+1] = uint8(offset)
+ i += 2
+ break
+ }
+
+ x = length
+ if x > 1<<6 {
+ x = 1 << 6
+ }
+ dst[i+0] = uint8(x-1)<<2 | tagCopy2
+ dst[i+1] = uint8(offset)
+ dst[i+2] = uint8(offset >> 8)
+ i += 3
+ length -= x
+ }
+ return i
+}
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+// It is valid to pass a nil dst.
+func Encode(dst, src []byte) ([]byte, error) {
+ if n := MaxEncodedLen(len(src)); len(dst) < n {
+ dst = make([]byte, n)
+ }
+
+ // The block starts with the varint-encoded length of the decompressed bytes.
+ d := binary.PutUvarint(dst, uint64(len(src)))
+
+ // Return early if src is short.
+ if len(src) <= 4 {
+ if len(src) != 0 {
+ d += emitLiteral(dst[d:], src)
+ }
+ return dst[:d], nil
+ }
+
+ // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
+ const maxTableSize = 1 << 14
+ shift, tableSize := uint(32-8), 1<<8
+ for tableSize < maxTableSize && tableSize < len(src) {
+ shift--
+ tableSize *= 2
+ }
+ var table [maxTableSize]int
+
+ // Iterate over the source bytes.
+ var (
+ s int // The iterator position.
+ t int // The last position with the same hash as s.
+ lit int // The start position of any pending literal bytes.
+ )
+ for s+3 < len(src) {
+ // Update the hash table.
+ b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
+ h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
+ p := &table[(h*0x1e35a7bd)>>shift]
+ // We need to to store values in [-1, inf) in table. To save
+ // some initialization time, (re)use the table's zero value
+ // and shift the values against this zero: add 1 on writes,
+ // subtract 1 on reads.
+ t, *p = *p-1, s+1
+ // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
+ if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
+ s++
+ continue
+ }
+ // Otherwise, we have a match. First, emit any pending literal bytes.
+ if lit != s {
+ d += emitLiteral(dst[d:], src[lit:s])
+ }
+ // Extend the match to be as long as possible.
+ s0 := s
+ s, t = s+4, t+4
+ for s < len(src) && src[s] == src[t] {
+ s++
+ t++
+ }
+ // Emit the copied bytes.
+ d += emitCopy(dst[d:], s-t, s-s0)
+ lit = s
+ }
+
+ // Emit any final pending literal bytes and return.
+ if lit != len(src) {
+ d += emitLiteral(dst[d:], src[lit:])
+ }
+ return dst[:d], nil
+}
+
+// MaxEncodedLen returns the maximum length of a snappy block, given its
+// uncompressed length.
+func MaxEncodedLen(srcLen int) int {
+ // Compressed data can be defined as:
+ // compressed := item* literal*
+ // item := literal* copy
+ //
+ // The trailing literal sequence has a space blowup of at most 62/60
+ // since a literal of length 60 needs one tag byte + one extra byte
+ // for length information.
+ //
+ // Item blowup is trickier to measure. Suppose the "copy" op copies
+ // 4 bytes of data. Because of a special check in the encoding code,
+ // we produce a 4-byte copy only if the offset is < 65536. Therefore
+ // the copy op takes 3 bytes to encode, and this type of item leads
+ // to at most the 62/60 blowup for representing literals.
+ //
+ // Suppose the "copy" op copies 5 bytes of data. If the offset is big
+ // enough, it will take 5 bytes to encode the copy op. Therefore the
+ // worst case here is a one-byte literal followed by a five-byte copy.
+ // That is, 6 bytes of input turn into 7 bytes of "compressed" data.
+ //
+ // This last factor dominates the blowup, so the final estimate is:
+ return 32 + srcLen + srcLen/6
+}
+
+// NewWriter returns a new Writer that compresses to w, using the framing
+// format described at
+// https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+func NewWriter(w io.Writer) *Writer {
+ return &Writer{
+ w: w,
+ enc: make([]byte, MaxEncodedLen(maxUncompressedChunkLen)),
+ }
+}
+
+// Writer is an io.Writer than can write Snappy-compressed bytes.
+type Writer struct {
+ w io.Writer
+ err error
+ enc []byte
+ buf [checksumSize + chunkHeaderSize]byte
+ wroteHeader bool
+}
+
+// Reset discards the writer's state and switches the Snappy writer to write to
+// w. This permits reusing a Writer rather than allocating a new one.
+func (w *Writer) Reset(writer io.Writer) {
+ w.w = writer
+ w.err = nil
+ w.wroteHeader = false
+}
+
+// Write satisfies the io.Writer interface.
+func (w *Writer) Write(p []byte) (n int, errRet error) {
+ if w.err != nil {
+ return 0, w.err
+ }
+ if !w.wroteHeader {
+ copy(w.enc, magicChunk)
+ if _, err := w.w.Write(w.enc[:len(magicChunk)]); err != nil {
+ w.err = err
+ return n, err
+ }
+ w.wroteHeader = true
+ }
+ for len(p) > 0 {
+ var uncompressed []byte
+ if len(p) > maxUncompressedChunkLen {
+ uncompressed, p = p[:maxUncompressedChunkLen], p[maxUncompressedChunkLen:]
+ } else {
+ uncompressed, p = p, nil
+ }
+ checksum := crc(uncompressed)
+
+ // Compress the buffer, discarding the result if the improvement
+ // isn't at least 12.5%.
+ chunkType := uint8(chunkTypeCompressedData)
+ chunkBody, err := Encode(w.enc, uncompressed)
+ if err != nil {
+ w.err = err
+ return n, err
+ }
+ if len(chunkBody) >= len(uncompressed)-len(uncompressed)/8 {
+ chunkType, chunkBody = chunkTypeUncompressedData, uncompressed
+ }
+
+ chunkLen := 4 + len(chunkBody)
+ w.buf[0] = chunkType
+ w.buf[1] = uint8(chunkLen >> 0)
+ w.buf[2] = uint8(chunkLen >> 8)
+ w.buf[3] = uint8(chunkLen >> 16)
+ w.buf[4] = uint8(checksum >> 0)
+ w.buf[5] = uint8(checksum >> 8)
+ w.buf[6] = uint8(checksum >> 16)
+ w.buf[7] = uint8(checksum >> 24)
+ if _, err = w.w.Write(w.buf[:]); err != nil {
+ w.err = err
+ return n, err
+ }
+ if _, err = w.w.Write(chunkBody); err != nil {
+ w.err = err
+ return n, err
+ }
+ n += len(uncompressed)
+ }
+ return n, nil
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy.go
new file mode 100644
index 000000000..043bf3d81
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy.go
@@ -0,0 +1,68 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package snappy implements the snappy block-based compression format.
+// It aims for very high speeds and reasonable compression.
+//
+// The C++ snappy implementation is at http://code.google.com/p/snappy/
+package snappy
+
+import (
+ "hash/crc32"
+)
+
+/*
+Each encoded block begins with the varint-encoded length of the decoded data,
+followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
+first byte of each chunk is broken into its 2 least and 6 most significant bits
+called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
+Zero means a literal tag. All other values mean a copy tag.
+
+For literal tags:
+ - If m < 60, the next 1 + m bytes are literal bytes.
+ - Otherwise, let n be the little-endian unsigned integer denoted by the next
+ m - 59 bytes. The next 1 + n bytes after that are literal bytes.
+
+For copy tags, length bytes are copied from offset bytes ago, in the style of
+Lempel-Ziv compression algorithms. In particular:
+ - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
+ The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
+ of the offset. The next byte is bits 0-7 of the offset.
+ - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
+ The length is 1 + m. The offset is the little-endian unsigned integer
+ denoted by the next 2 bytes.
+ - For l == 3, this tag is a legacy format that is no longer supported.
+*/
+const (
+ tagLiteral = 0x00
+ tagCopy1 = 0x01
+ tagCopy2 = 0x02
+ tagCopy4 = 0x03
+)
+
+const (
+ checksumSize = 4
+ chunkHeaderSize = 4
+ magicChunk = "\xff\x06\x00\x00" + magicBody
+ magicBody = "sNaPpY"
+ // https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt says
+ // that "the uncompressed data in a chunk must be no longer than 65536 bytes".
+ maxUncompressedChunkLen = 65536
+)
+
+const (
+ chunkTypeCompressedData = 0x00
+ chunkTypeUncompressedData = 0x01
+ chunkTypePadding = 0xfe
+ chunkTypeStreamIdentifier = 0xff
+)
+
+var crcTable = crc32.MakeTable(crc32.Castagnoli)
+
+// crc implements the checksum specified in section 3 of
+// https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
+func crc(b []byte) uint32 {
+ c := crc32.Update(0, crcTable, b)
+ return uint32(c>>15|c<<17) + 0xa282ead8
+}
diff --git a/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy_test.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy_test.go
new file mode 100644
index 000000000..0623385b7
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy_test.go
@@ -0,0 +1,364 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "math/rand"
+ "net/http"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+var (
+ download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
+ testdata = flag.String("testdata", "testdata", "Directory containing the test data")
+)
+
+func roundtrip(b, ebuf, dbuf []byte) error {
+ e, err := Encode(ebuf, b)
+ if err != nil {
+ return fmt.Errorf("encoding error: %v", err)
+ }
+ d, err := Decode(dbuf, e)
+ if err != nil {
+ return fmt.Errorf("decoding error: %v", err)
+ }
+ if !bytes.Equal(b, d) {
+ return fmt.Errorf("roundtrip mismatch:\n\twant %v\n\tgot %v", b, d)
+ }
+ return nil
+}
+
+func TestEmpty(t *testing.T) {
+ if err := roundtrip(nil, nil, nil); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestSmallCopy(t *testing.T) {
+ for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
+ for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
+ for i := 0; i < 32; i++ {
+ s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
+ if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
+ t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
+ }
+ }
+ }
+ }
+}
+
+func TestSmallRand(t *testing.T) {
+ rng := rand.New(rand.NewSource(27354294))
+ for n := 1; n < 20000; n += 23 {
+ b := make([]byte, n)
+ for i := range b {
+ b[i] = uint8(rng.Uint32())
+ }
+ if err := roundtrip(b, nil, nil); err != nil {
+ t.Fatal(err)
+ }
+ }
+}
+
+func TestSmallRegular(t *testing.T) {
+ for n := 1; n < 20000; n += 23 {
+ b := make([]byte, n)
+ for i := range b {
+ b[i] = uint8(i%10 + 'a')
+ }
+ if err := roundtrip(b, nil, nil); err != nil {
+ t.Fatal(err)
+ }
+ }
+}
+
+func cmp(a, b []byte) error {
+ if len(a) != len(b) {
+ return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
+ }
+ for i := range a {
+ if a[i] != b[i] {
+ return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
+ }
+ }
+ return nil
+}
+
+func TestFramingFormat(t *testing.T) {
+ // src is comprised of alternating 1e5-sized sequences of random
+ // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
+ // because it is larger than maxUncompressedChunkLen (64k).
+ src := make([]byte, 1e6)
+ rng := rand.New(rand.NewSource(1))
+ for i := 0; i < 10; i++ {
+ if i%2 == 0 {
+ for j := 0; j < 1e5; j++ {
+ src[1e5*i+j] = uint8(rng.Intn(256))
+ }
+ } else {
+ for j := 0; j < 1e5; j++ {
+ src[1e5*i+j] = uint8(i)
+ }
+ }
+ }
+
+ buf := new(bytes.Buffer)
+ if _, err := NewWriter(buf).Write(src); err != nil {
+ t.Fatalf("Write: encoding: %v", err)
+ }
+ dst, err := ioutil.ReadAll(NewReader(buf))
+ if err != nil {
+ t.Fatalf("ReadAll: decoding: %v", err)
+ }
+ if err := cmp(dst, src); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestReaderReset(t *testing.T) {
+ gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
+ buf := new(bytes.Buffer)
+ if _, err := NewWriter(buf).Write(gold); err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+ encoded, invalid, partial := buf.String(), "invalid", "partial"
+ r := NewReader(nil)
+ for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
+ if s == partial {
+ r.Reset(strings.NewReader(encoded))
+ if _, err := r.Read(make([]byte, 101)); err != nil {
+ t.Errorf("#%d: %v", i, err)
+ continue
+ }
+ continue
+ }
+ r.Reset(strings.NewReader(s))
+ got, err := ioutil.ReadAll(r)
+ switch s {
+ case encoded:
+ if err != nil {
+ t.Errorf("#%d: %v", i, err)
+ continue
+ }
+ if err := cmp(got, gold); err != nil {
+ t.Errorf("#%d: %v", i, err)
+ continue
+ }
+ case invalid:
+ if err == nil {
+ t.Errorf("#%d: got nil error, want non-nil", i)
+ continue
+ }
+ }
+ }
+}
+
+func TestWriterReset(t *testing.T) {
+ gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
+ var gots, wants [][]byte
+ const n = 20
+ w, failed := NewWriter(nil), false
+ for i := 0; i <= n; i++ {
+ buf := new(bytes.Buffer)
+ w.Reset(buf)
+ want := gold[:len(gold)*i/n]
+ if _, err := w.Write(want); err != nil {
+ t.Errorf("#%d: Write: %v", i, err)
+ failed = true
+ continue
+ }
+ got, err := ioutil.ReadAll(NewReader(buf))
+ if err != nil {
+ t.Errorf("#%d: ReadAll: %v", i, err)
+ failed = true
+ continue
+ }
+ gots = append(gots, got)
+ wants = append(wants, want)
+ }
+ if failed {
+ return
+ }
+ for i := range gots {
+ if err := cmp(gots[i], wants[i]); err != nil {
+ t.Errorf("#%d: %v", i, err)
+ }
+ }
+}
+
+func benchDecode(b *testing.B, src []byte) {
+ encoded, err := Encode(nil, src)
+ if err != nil {
+ b.Fatal(err)
+ }
+ // Bandwidth is in amount of uncompressed data.
+ b.SetBytes(int64(len(src)))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ Decode(src, encoded)
+ }
+}
+
+func benchEncode(b *testing.B, src []byte) {
+ // Bandwidth is in amount of uncompressed data.
+ b.SetBytes(int64(len(src)))
+ dst := make([]byte, MaxEncodedLen(len(src)))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ Encode(dst, src)
+ }
+}
+
+func readFile(b testing.TB, filename string) []byte {
+ src, err := ioutil.ReadFile(filename)
+ if err != nil {
+ b.Fatalf("failed reading %s: %s", filename, err)
+ }
+ if len(src) == 0 {
+ b.Fatalf("%s has zero length", filename)
+ }
+ return src
+}
+
+// expand returns a slice of length n containing repeated copies of src.
+func expand(src []byte, n int) []byte {
+ dst := make([]byte, n)
+ for x := dst; len(x) > 0; {
+ i := copy(x, src)
+ x = x[i:]
+ }
+ return dst
+}
+
+func benchWords(b *testing.B, n int, decode bool) {
+ // Note: the file is OS-language dependent so the resulting values are not
+ // directly comparable for non-US-English OS installations.
+ data := expand(readFile(b, "/usr/share/dict/words"), n)
+ if decode {
+ benchDecode(b, data)
+ } else {
+ benchEncode(b, data)
+ }
+}
+
+func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
+func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
+func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
+func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
+func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
+func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
+func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
+func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
+
+// testFiles' values are copied directly from
+// https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
+// The label field is unused in snappy-go.
+var testFiles = []struct {
+ label string
+ filename string
+}{
+ {"html", "html"},
+ {"urls", "urls.10K"},
+ {"jpg", "fireworks.jpeg"},
+ {"jpg_200", "fireworks.jpeg"},
+ {"pdf", "paper-100k.pdf"},
+ {"html4", "html_x_4"},
+ {"txt1", "alice29.txt"},
+ {"txt2", "asyoulik.txt"},
+ {"txt3", "lcet10.txt"},
+ {"txt4", "plrabn12.txt"},
+ {"pb", "geo.protodata"},
+ {"gaviota", "kppkn.gtb"},
+}
+
+// The test data files are present at this canonical URL.
+const baseURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
+
+func downloadTestdata(basename string) (errRet error) {
+ filename := filepath.Join(*testdata, basename)
+ if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
+ return nil
+ }
+
+ if !*download {
+ return fmt.Errorf("test data not found; skipping benchmark without the -download flag")
+ }
+ // Download the official snappy C++ implementation reference test data
+ // files for benchmarking.
+ if err := os.Mkdir(*testdata, 0777); err != nil && !os.IsExist(err) {
+ return fmt.Errorf("failed to create testdata: %s", err)
+ }
+
+ f, err := os.Create(filename)
+ if err != nil {
+ return fmt.Errorf("failed to create %s: %s", filename, err)
+ }
+ defer f.Close()
+ defer func() {
+ if errRet != nil {
+ os.Remove(filename)
+ }
+ }()
+ url := baseURL + basename
+ resp, err := http.Get(url)
+ if err != nil {
+ return fmt.Errorf("failed to download %s: %s", url, err)
+ }
+ defer resp.Body.Close()
+ if s := resp.StatusCode; s != http.StatusOK {
+ return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
+ }
+ _, err = io.Copy(f, resp.Body)
+ if err != nil {
+ return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
+ }
+ return nil
+}
+
+func benchFile(b *testing.B, n int, decode bool) {
+ if err := downloadTestdata(testFiles[n].filename); err != nil {
+ b.Fatalf("failed to download testdata: %s", err)
+ }
+ data := readFile(b, filepath.Join(*testdata, testFiles[n].filename))
+ if decode {
+ benchDecode(b, data)
+ } else {
+ benchEncode(b, data)
+ }
+}
+
+// Naming convention is kept similar to what snappy's C++ implementation uses.
+func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) }
+func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }
+func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) }
+func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) }
+func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) }
+func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) }
+func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) }
+func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) }
+func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) }
+func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) }
+func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
+func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
+func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) }
+func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) }
+func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) }
+func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) }
+func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) }
+func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) }
+func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) }
+func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) }
+func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) }
+func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) }
+func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
+func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }