aboutsummaryrefslogblamecommitdiffstats
path: root/vendor/github.com/syndtr/goleveldb/leveldb/version.go
blob: c60f12c20b2b03e99cf3ef666f632255850a4331 (plain) (tree)
1
2
3
4
5
6
7
8
9








                                                                         
             







                                                      







                     
                       

                                                                        

                                                                          




                            

                    
                              


                     
                                      
                             


                               







                                             

                                          
                                      

                                              


                 
                                     
                                      

                                                          
                                                  



                         
                          




                             
                     


                        
                                                                                                                            
                           
 














                                                             
                                      
                                             
                                     




                                                                                    





                                                                     

                                 
                        






                                                                                   
                         
                 
 

                                            
                 

         
 
                                                                                                                              



                                            
                           
 






                             
                              




                             
                                                                  
                                                         

                                                                     



                                                      
                         
                 
 








                                                                      
 








                                    
                                                                                     
                                                                

                                                                      




                                                             
                                         

                                                    
                                                        

                                                            
                                                        
                                                
                                                                                          

                                                    
                                 
                         


                                    
                 




                                    
                                        

                                            
                                        
                                
                                                                          
                         
                                    
                 





                                                                                         

         


              
                                                             
                      
 
                                                                     


                                              
                 



                                                                                                 





                                                                                                  
                                                                    







                                                                                                                                      
                 
         



                                                
                                       









                                                    

                                             





                                                



                                           

 
                                                                   
                                             


                                                                                          
                                           

                                                                         


                                                                                               
                                                  


                                             

                                                                                    


                                                                                     

                                                     






                         


















                                                                                                                       


                         




                                                     

                                
 





                                                  
                                 
                                     




                                                                                       
                                                   





                                                                                        
                                                                                              
                        
                                                                                            





                                         




                                                             



                            

                                                                                                                  





                                                                   
                           

                                  

 

                            









                                                               


                                                   

                                           



                                                                                    
                         
                                                           
                 

                                                    


                 

                                         


                                                                
                 


                                                      




                                            

                                  








                                                         
                 
 






                                                                                                    
                         









                                                                           
                         
 



                                                                       
 









                                                                   
                        
                                                     
                 

         





                                                   
                                                    













                                      
                             



                              
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

package leveldb

import (
    "fmt"
    "sync/atomic"
    "unsafe"

    "github.com/syndtr/goleveldb/leveldb/iterator"
    "github.com/syndtr/goleveldb/leveldb/opt"
    "github.com/syndtr/goleveldb/leveldb/util"
)

type tSet struct {
    level int
    table *tFile
}

type version struct {
    s *session

    levels []tFiles

    // Level that should be compacted next and its compaction score.
    // Score < 1 means compaction is not strictly needed. These fields
    // are initialized by computeCompaction()
    cLevel int
    cScore float64

    cSeek unsafe.Pointer

    closing bool
    ref     int
    // Succeeding version.
    next *version
}

func newVersion(s *session) *version {
    return &version{s: s}
}

func (v *version) releaseNB() {
    v.ref--
    if v.ref > 0 {
        return
    }
    if v.ref < 0 {
        panic("negative version ref")
    }

    nextTables := make(map[int64]bool)
    for _, tt := range v.next.levels {
        for _, t := range tt {
            num := t.fd.Num
            nextTables[num] = true
        }
    }

    for _, tt := range v.levels {
        for _, t := range tt {
            num := t.fd.Num
            if _, ok := nextTables[num]; !ok {
                v.s.tops.remove(t)
            }
        }
    }

    v.next.releaseNB()
    v.next = nil
}

func (v *version) release() {
    v.s.vmu.Lock()
    v.releaseNB()
    v.s.vmu.Unlock()
}

func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
    ukey := ikey.ukey()

    // Aux level.
    if aux != nil {
        for _, t := range aux {
            if t.overlaps(v.s.icmp, ukey, ukey) {
                if !f(-1, t) {
                    return
                }
            }
        }

        if lf != nil && !lf(-1) {
            return
        }
    }

    // Walk tables level-by-level.
    for level, tables := range v.levels {
        if len(tables) == 0 {
            continue
        }

        if level == 0 {
            // Level-0 files may overlap each other. Find all files that
            // overlap ukey.
            for _, t := range tables {
                if t.overlaps(v.s.icmp, ukey, ukey) {
                    if !f(level, t) {
                        return
                    }
                }
            }
        } else {
            if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
                t := tables[i]
                if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
                    if !f(level, t) {
                        return
                    }
                }
            }
        }

        if lf != nil && !lf(level) {
            return
        }
    }
}

func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
    if v.closing {
        return nil, false, ErrClosed
    }

    ukey := ikey.ukey()

    var (
        tset  *tSet
        tseek bool

        // Level-0.
        zfound bool
        zseq   uint64
        zkt    keyType
        zval   []byte
    )

    err = ErrNotFound

    // Since entries never hop across level, finding key/value
    // in smaller level make later levels irrelevant.
    v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
        if level >= 0 && !tseek {
            if tset == nil {
                tset = &tSet{level, t}
            } else {
                tseek = true
            }
        }

        var (
            fikey, fval []byte
            ferr        error
        )
        if noValue {
            fikey, ferr = v.s.tops.findKey(t, ikey, ro)
        } else {
            fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
        }

        switch ferr {
        case nil:
        case ErrNotFound:
            return true
        default:
            err = ferr
            return false
        }

        if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
            if v.s.icmp.uCompare(ukey, fukey) == 0 {
                // Level <= 0 may overlaps each-other.
                if level <= 0 {
                    if fseq >= zseq {
                        zfound = true
                        zseq = fseq
                        zkt = fkt
                        zval = fval
                    }
                } else {
                    switch fkt {
                    case keyTypeVal:
                        value = fval
                        err = nil
                    case keyTypeDel:
                    default:
                        panic("leveldb: invalid internalKey type")
                    }
                    return false
                }
            }
        } else {
            err = fkerr
            return false
        }

        return true
    }, func(level int) bool {
        if zfound {
            switch zkt {
            case keyTypeVal:
                value = zval
                err = nil
            case keyTypeDel:
            default:
                panic("leveldb: invalid internalKey type")
            }
            return false
        }

        return true
    })

    if tseek && tset.table.consumeSeek() <= 0 {
        tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
    }

    return
}

func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
    var tset *tSet

    v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
        if tset == nil {
            tset = &tSet{level, t}
            return true
        }
        if tset.table.consumeSeek() <= 0 {
            tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
        }
        return false
    }, nil)

    return
}

func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
    strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
    for level, tables := range v.levels {
        if level == 0 {
            // Merge all level zero files together since they may overlap.
            for _, t := range tables {
                its = append(its, v.s.tops.newIterator(t, slice, ro))
            }
        } else if len(tables) != 0 {
            its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
        }
    }
    return
}

func (v *version) newStaging() *versionStaging {
    return &versionStaging{base: v}
}

// Spawn a new version based on this version.
func (v *version) spawn(r *sessionRecord) *version {
    staging := v.newStaging()
    staging.commit(r)
    return staging.finish()
}

func (v *version) fillRecord(r *sessionRecord) {
    for level, tables := range v.levels {
        for _, t := range tables {
            r.addTableFile(level, t)
        }
    }
}

func (v *version) tLen(level int) int {
    if level < len(v.levels) {
        return len(v.levels[level])
    }
    return 0
}

func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
    for level, tables := range v.levels {
        for _, t := range tables {
            if v.s.icmp.Compare(t.imax, ikey) <= 0 {
                // Entire file is before "ikey", so just add the file size
                n += t.size
            } else if v.s.icmp.Compare(t.imin, ikey) > 0 {
                // Entire file is after "ikey", so ignore
                if level > 0 {
                    // Files other than level 0 are sorted by meta->min, so
                    // no further files in this level will contain data for
                    // "ikey".
                    break
                }
            } else {
                // "ikey" falls in the range for this table. Add the
                // approximate offset of "ikey" within the table.
                if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
                    n += m
                } else {
                    return 0, err
                }
            }
        }
    }

    return
}

func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
    if maxLevel > 0 {
        if len(v.levels) == 0 {
            return maxLevel
        }
        if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
            var overlaps tFiles
            for ; level < maxLevel; level++ {
                if pLevel := level + 1; pLevel >= len(v.levels) {
                    return maxLevel
                } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
                    break
                }
                if gpLevel := level + 2; gpLevel < len(v.levels) {
                    overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
                    if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
                        break
                    }
                }
            }
        }
    }
    return
}

func (v *version) computeCompaction() {
    // Precomputed best level for next compaction
    bestLevel := int(-1)
    bestScore := float64(-1)

    statFiles := make([]int, len(v.levels))
    statSizes := make([]string, len(v.levels))
    statScore := make([]string, len(v.levels))
    statTotSize := int64(0)

    for level, tables := range v.levels {
        var score float64
        size := tables.size()
        if level == 0 {
            // We treat level-0 specially by bounding the number of files
            // instead of number of bytes for two reasons:
            //
            // (1) With larger write-buffer sizes, it is nice not to do too
            // many level-0 compaction.
            //
            // (2) The files in level-0 are merged on every read and
            // therefore we wish to avoid too many files when the individual
            // file size is small (perhaps because of a small write-buffer
            // setting, or very high compression ratios, or lots of
            // overwrites/deletions).
            score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
        } else {
            score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
        }

        if score > bestScore {
            bestLevel = level
            bestScore = score
        }

        statFiles[level] = len(tables)
        statSizes[level] = shortenb(int(size))
        statScore[level] = fmt.Sprintf("%.2f", score)
        statTotSize += size
    }

    v.cLevel = bestLevel
    v.cScore = bestScore

    v.s.logf("version@stat F·%v %s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
}

func (v *version) needCompaction() bool {
    return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
}

type tablesScratch struct {
    added   map[int64]atRecord
    deleted map[int64]struct{}
}

type versionStaging struct {
    base   *version
    levels []tablesScratch
}

func (p *versionStaging) getScratch(level int) *tablesScratch {
    if level >= len(p.levels) {
        newLevels := make([]tablesScratch, level+1)
        copy(newLevels, p.levels)
        p.levels = newLevels
    }
    return &(p.levels[level])
}

func (p *versionStaging) commit(r *sessionRecord) {
    // Deleted tables.
    for _, r := range r.deletedTables {
        scratch := p.getScratch(r.level)
        if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
            if scratch.deleted == nil {
                scratch.deleted = make(map[int64]struct{})
            }
            scratch.deleted[r.num] = struct{}{}
        }
        if scratch.added != nil {
            delete(scratch.added, r.num)
        }
    }

    // New tables.
    for _, r := range r.addedTables {
        scratch := p.getScratch(r.level)
        if scratch.added == nil {
            scratch.added = make(map[int64]atRecord)
        }
        scratch.added[r.num] = r
        if scratch.deleted != nil {
            delete(scratch.deleted, r.num)
        }
    }
}

func (p *versionStaging) finish() *version {
    // Build new version.
    nv := newVersion(p.base.s)
    numLevel := len(p.levels)
    if len(p.base.levels) > numLevel {
        numLevel = len(p.base.levels)
    }
    nv.levels = make([]tFiles, numLevel)
    for level := 0; level < numLevel; level++ {
        var baseTabels tFiles
        if level < len(p.base.levels) {
            baseTabels = p.base.levels[level]
        }

        if level < len(p.levels) {
            scratch := p.levels[level]

            var nt tFiles
            // Prealloc list if possible.
            if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
                nt = make(tFiles, 0, n)
            }

            // Base tables.
            for _, t := range baseTabels {
                if _, ok := scratch.deleted[t.fd.Num]; ok {
                    continue
                }
                if _, ok := scratch.added[t.fd.Num]; ok {
                    continue
                }
                nt = append(nt, t)
            }

            // New tables.
            for _, r := range scratch.added {
                nt = append(nt, tableFileFromRecord(r))
            }

            if len(nt) != 0 {
                // Sort tables.
                if level == 0 {
                    nt.sortByNum()
                } else {
                    nt.sortByKey(p.base.s.icmp)
                }

                nv.levels[level] = nt
            }
        } else {
            nv.levels[level] = baseTabels
        }
    }

    // Trim levels.
    n := len(nv.levels)
    for ; n > 0 && nv.levels[n-1] == nil; n-- {
    }
    nv.levels = nv.levels[:n]

    // Compute compaction score for new version.
    nv.computeCompaction()

    return nv
}

type versionReleaser struct {
    v    *version
    once bool
}

func (vr *versionReleaser) Release() {
    v := vr.v
    v.s.vmu.Lock()
    if !vr.once {
        v.releaseNB()
        vr.once = true
    }
    v.s.vmu.Unlock()
}