mirror of
https://github.com/kopia/kopia.git
synced 2026-01-26 07:18:02 -05:00
added hashcache database, based on BoltDB which caches results of computing ObjectID for any given os.FileInfo.
This commit is contained in:
158
internal/hashcache/hashcache_database.go
Normal file
158
internal/hashcache/hashcache_database.go
Normal file
@@ -0,0 +1,158 @@
|
||||
// Package hashcache implements streaming cache of file hashes.
|
||||
package hashcache
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
"github.com/kopia/kopia/repo"
|
||||
)
|
||||
|
||||
const batchSize = 10
|
||||
|
||||
var hashcacheBucket = []byte("hashcache")
|
||||
|
||||
// DB is a databsase caching ObjectID computations for local files.
|
||||
type DB struct {
|
||||
db *bolt.DB
|
||||
}
|
||||
|
||||
// DirectoryInfo manages a set of hashes for all directory entries.
|
||||
type DirectoryInfo struct {
|
||||
parent *DB
|
||||
dirty bool
|
||||
dirName string
|
||||
|
||||
previousFiles map[string]string
|
||||
currentFiles map[string]string
|
||||
}
|
||||
|
||||
// NewHashCacheDB returns new hash cache database stored in a given file.
|
||||
func NewHashCacheDB(filename string) (*DB, error) {
|
||||
db, err := bolt.Open(filename, 0600, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &DB{db: db}, nil
|
||||
}
|
||||
|
||||
func (h *DB) keyOf(dirName string) []byte {
|
||||
o := sha1.New()
|
||||
fmt.Fprintf(o, "%v", dirName)
|
||||
return o.Sum(nil)
|
||||
}
|
||||
|
||||
// OpenDir returns DirectoryInfo for a single local directory.
|
||||
// The caller is expected to call Lookup()/Set() for all entries in the directory before calling Save().
|
||||
func (h *DB) OpenDir(dirName string) DirectoryInfo {
|
||||
var dhi DirectoryInfo
|
||||
|
||||
h.db.View(func(t *bolt.Tx) error {
|
||||
b := t.Bucket(hashcacheBucket)
|
||||
if b == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
v := b.Get(h.keyOf(dirName))
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
gz, err := gzip.NewReader(bytes.NewReader(v))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var files map[string]string
|
||||
|
||||
if err := json.NewDecoder(gz).Decode(&files); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
dhi.previousFiles = files
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
dhi.parent = h
|
||||
dhi.dirName = dirName
|
||||
dhi.currentFiles = make(map[string]string)
|
||||
|
||||
return dhi
|
||||
}
|
||||
|
||||
func (hi *DirectoryInfo) keyOf(fi os.FileInfo) string {
|
||||
h := sha1.New()
|
||||
fmt.Fprintf(h, "%v/%v/%v @ %v", fi.ModTime().UnixNano(), int(fi.Mode()), fi.Size(), fi.Name())
|
||||
return hex.EncodeToString(h.Sum(nil))
|
||||
}
|
||||
|
||||
// Lookup fetches the ObjectID corresponding to the given FileInfo in a directory, if present.
|
||||
func (hi *DirectoryInfo) Lookup(fi os.FileInfo) (repo.ObjectID, bool) {
|
||||
k := hi.keyOf(fi)
|
||||
s, ok := hi.previousFiles[k]
|
||||
if !ok {
|
||||
return repo.NullObjectID, false
|
||||
}
|
||||
|
||||
oid, err := repo.ParseObjectID(s)
|
||||
if err != nil {
|
||||
return repo.NullObjectID, false
|
||||
}
|
||||
|
||||
hi.currentFiles[k] = s
|
||||
return oid, true
|
||||
}
|
||||
|
||||
// Set associates ObjectID with a FileInfo that will be persisted on Save().
|
||||
func (hi *DirectoryInfo) Set(fi os.FileInfo, oid repo.ObjectID) {
|
||||
k := hi.keyOf(fi)
|
||||
new := oid.String()
|
||||
hi.currentFiles[k] = new
|
||||
hi.dirty = true
|
||||
}
|
||||
|
||||
// Save writes the current associations of FileInfo->ObjectID for the directory to the databsase.
|
||||
// Any entries for which neither Lookup() nor Set() has been called are assumed to be removed.
|
||||
func (hi *DirectoryInfo) Save() error {
|
||||
if len(hi.previousFiles) == len(hi.currentFiles) && !hi.dirty {
|
||||
return nil
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
gz := gzip.NewWriter(&buf)
|
||||
|
||||
if err := json.NewEncoder(gz).Encode(hi.currentFiles); err != nil {
|
||||
return err
|
||||
}
|
||||
gz.Flush()
|
||||
|
||||
value := buf.Bytes()
|
||||
|
||||
err := hi.parent.db.Update(func(t *bolt.Tx) error {
|
||||
b := t.Bucket(hashcacheBucket)
|
||||
if b == nil {
|
||||
var err error
|
||||
b, err = t.CreateBucket(hashcacheBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return b.Put(hi.parent.keyOf(hi.dirName), value)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Printf("warning: failed to update hash cache: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user