From 72f28ed2b2926fae1cf0e713cb5f7fe043f35164 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 11 Apr 2016 20:17:12 -0700 Subject: [PATCH] hash cache w.i.p. --- cmd/playground/main.go | 31 +++-- fs/dir.go | 19 +-- fs/dir_json.go | 286 ++++++++++++++++++++++++----------------- fs/dir_test.go | 82 ++++++++---- fs/entry.go | 60 ++++++++- fs/hashcache.go | 51 ++++++++ fs/lister.go | 2 +- fs/lister_test.go | 10 +- fs/upload.go | 98 ++++++++------ 9 files changed, 417 insertions(+), 222 deletions(-) create mode 100644 fs/hashcache.go diff --git a/cmd/playground/main.go b/cmd/playground/main.go index 4a2183388..a2036ed24 100644 --- a/cmd/playground/main.go +++ b/cmd/playground/main.go @@ -1,8 +1,8 @@ package main import ( - "compress/gzip" "io" + "io/ioutil" "log" "sync" "sync/atomic" @@ -190,10 +190,12 @@ func readCached(omgr cas.ObjectManager, manifestOID cas.ObjectID) { log.Fatalf("Error: %v", err) } - r, err = gzip.NewReader(r) + //r, err = gzip.NewReader(r) t0 := time.Now() - fs.ReadDirectory(r, "") + //fs.ReadDirectory(r, "") + v, _ := ioutil.ReadAll(r) + log.Printf("%v", string(v)) dt := time.Since(t0) log.Printf("parsed in %v", dt) } @@ -206,11 +208,11 @@ func main() { data := map[string][]byte{} st := blob.NewMapStorage(data) - st = &highLatencyStorage{ - Storage: st, - writeDelay: 1 * time.Millisecond, - readDelay: 10 * time.Millisecond, - } + // st = &highLatencyStorage{ + // Storage: st, + // writeDelay: 0 * time.Millisecond, + // readDelay: 0 * time.Millisecond, + // } format := cas.Format{ Version: "1", Hash: "md5", @@ -224,12 +226,13 @@ func main() { time.Sleep(1 * time.Second) - _, manifestOID := uploadAndTime(omgr, "/Users/jarek/Projects/Kopia", "") - omgr.ResetStats() - readCached(omgr, manifestOID) - log.Printf("stats: %#v", omgr.Stats()) - // // uploadAndTime(omgr, "/Users/jarek/Projects/Kopia", "") - // // uploadAndTime(omgr, "/Users/jarek/Projects/Kopia", oid) + path := "/Users/jarek/Projects/Kopia/src/github.com/kopia/" + + _, manifestOID := uploadAndTime(omgr, path, "") + log.Printf("second time") + uploadAndTime(omgr, path, manifestOID) + log.Printf("finished second time") + //readCached(omgr, manifestOID) // time.Sleep(1 * time.Second) // for i := 0; i < 1; i++ { // t0 := time.Now() diff --git a/fs/dir.go b/fs/dir.go index c4f0f068a..c0bd75b3a 100644 --- a/fs/dir.go +++ b/fs/dir.go @@ -5,30 +5,21 @@ // Directory represents contents of a directory. type Directory []*Entry -type sortedDirectory []*Entry - -func (d sortedDirectory) Len() int { return len(d) } -func (d sortedDirectory) Swap(i, j int) { d[i], d[j] = d[j], d[i] } -func (d sortedDirectory) Less(i, j int) bool { - if d[i].IsDir() != d[j].IsDir() { - return d[i].IsDir() - } - +func (d Directory) Len() int { return len(d) } +func (d Directory) Swap(i, j int) { d[i], d[j] = d[j], d[i] } +func (d Directory) Less(i, j int) bool { return d[i].Name < d[j].Name } // FindByName returns Entry with a given name or nil if not found -func (d Directory) FindByName(isDir bool, n string) *Entry { +func (d Directory) FindByName(n string) *Entry { i := sort.Search( len(d), func(i int) bool { - if d[i].IsDir() != isDir { - return !d[i].IsDir() - } return d[i].Name >= n }, ) - if i < len(d) && d[i].Name == n && d[i].IsDir() == isDir { + if i < len(d) && d[i].Name == n { return d[i] } diff --git a/fs/dir_json.go b/fs/dir_json.go index ecacb88f5..6933d5423 100644 --- a/fs/dir_json.go +++ b/fs/dir_json.go @@ -3,95 +3,150 @@ import ( "encoding/binary" "encoding/json" - "errors" "fmt" "io" "os" "strconv" + "strings" "time" "github.com/kopia/kopia/cas" ) -const ( - maxDirectoryEntrySize = 65000 -) - -var ( - invalidDirectoryDataError = errors.New("invalid directory data") -) +const modeChars = "dalTLDpSugct" type jsonDirectoryEntry struct { - FileName string `json:"f,omitempty"` - DirName string `json:"d,omitempty"` - Permissions string `json:"p,omitempty"` - Size string `json:"s,omitempty"` - Time time.Time `json:"t"` - Owner string `json:"o,omitempty"` - ObjectID string `json:"oid,omitempty"` + Name string `json:"name"` + Mode string `json:"mode"` + Size string `json:"size,omitempty"` + Time time.Time `json:"modTime"` + Owner string `json:"owner,omitempty"` + ObjectID string `json:"oid,omitempty"` } func (de *Entry) fromJSON(jde *jsonDirectoryEntry) error { - var mode uint32 + de.Name = jde.Name - switch { - case jde.DirName != "": - de.Name = jde.DirName - mode = uint32(os.ModeDir) - - case jde.FileName != "": - de.Name = jde.FileName - mode = 0 + if mode, err := parseFileModeAndPermissions(jde.Mode); err == nil { + de.FileMode = mode + } else { + return fmt.Errorf("invalid mode: %v", err) } - if jde.Permissions != "" { - s, err := strconv.ParseUint(jde.Permissions, 8, 32) - if err != nil { - return err - } - mode |= uint32(s) - } - - de.FileMode = os.FileMode(mode) de.ModTime = jde.Time + if jde.Owner != "" { - fmt.Sscanf(jde.Owner, "%d:%d", &de.UserID, &de.GroupID) + if c, err := fmt.Sscanf(jde.Owner, "%d:%d", &de.UserID, &de.GroupID); err != nil || c != 2 { + return fmt.Errorf("invalid owner: %v", err) + } } de.ObjectID = cas.ObjectID(jde.ObjectID) if jde.Size != "" { - s, err := strconv.ParseInt(jde.Size, 10, 64) - if err != nil { - return err + if s, err := strconv.ParseInt(jde.Size, 10, 64); err == nil { + de.FileSize = s + } else { + return fmt.Errorf("invalid size: %v", err) } - de.FileSize = s } return nil } +// parseFileModeAndPermissions converts file mode string to os.FileMode +func parseFileModeAndPermissions(s string) (os.FileMode, error) { + colon := strings.IndexByte(s, ':') + if colon < 0 { + return parseFilePermissions(s) + } + + var mode os.FileMode + + if m, err := parseFileMode(s[0:colon]); err == nil { + mode |= m + } else { + return 0, err + } + + if m, err := parseFilePermissions(s[colon+1:]); err == nil { + mode |= m + } else { + return 0, err + } + + return mode, nil +} + +func parseFileMode(s string) (os.FileMode, error) { + var mode os.FileMode + for _, c := range s { + switch c { + case 'd': + mode |= os.ModeDir + case 'a': + mode |= os.ModeAppend + case 'l': + mode |= os.ModeExclusive + case 'T': + mode |= os.ModeTemporary + case 'L': + mode |= os.ModeSymlink + case 'D': + mode |= os.ModeDevice + case 'p': + mode |= os.ModeNamedPipe + case 'S': + mode |= os.ModeSocket + case 'u': + mode |= os.ModeSetuid + case 'g': + mode |= os.ModeSetgid + case 'c': + mode |= os.ModeCharDevice + case 't': + mode |= os.ModeSticky + default: + return 0, fmt.Errorf("unsupported mode: '%v'", c) + } + } + return mode, nil +} + +func parseFilePermissions(perm string) (os.FileMode, error) { + s, err := strconv.ParseUint(perm, 8, 32) + if err != nil { + return 0, err + } + return os.FileMode(s), nil +} + type directoryWriter struct { io.Closer writer io.Writer buf []byte separator []byte + + lastNameWritten string } func (dw *directoryWriter) WriteEntry(e *Entry) error { - var jde jsonDirectoryEntry - - switch e.FileMode & os.ModeType { - case os.ModeDir: - jde.DirName = e.Name - default: - jde.FileName = e.Name - jde.Size = strconv.FormatInt(e.FileSize, 10) + if dw.lastNameWritten != "" { + if isLessOrEqual(e.Name, dw.lastNameWritten) { + return fmt.Errorf("out-of-order directory entry, previous '%v' current '%v'", dw.lastNameWritten, e.Name) + } + dw.lastNameWritten = e.Name + } + jde := jsonDirectoryEntry{ + Name: e.Name, + Mode: formatModeAndPermissions(e.FileMode), + Time: e.ModTime.UTC(), + Owner: fmt.Sprintf("%d:%d", e.UserID, e.GroupID), + ObjectID: string(e.ObjectID), } - jde.Permissions = strconv.FormatInt(int64(e.FileMode&os.ModePerm), 8) - jde.Time = e.ModTime - jde.Owner = fmt.Sprintf("%d:%d", e.UserID, e.GroupID) - jde.ObjectID = string(e.ObjectID) + if e.FileMode.IsRegular() { + jde.Size = strconv.FormatInt(e.FileSize, 10) + } v, _ := json.Marshal(&jde) @@ -102,6 +157,24 @@ func (dw *directoryWriter) WriteEntry(e *Entry) error { return nil } +func formatModeAndPermissions(m os.FileMode) string { + const str = "dalTLDpSugct" + var buf [32]byte + w := 0 + for i, c := range str { + if m&(1< 0 { + buf[w] = ':' + w++ + } + + return string(buf[:w]) + strconv.FormatInt(int64(m&os.ModePerm), 8) +} + func (dw *directoryWriter) Close() error { dw.writer.Write([]byte("\n]}\n")) return nil @@ -151,100 +224,81 @@ func (dr *directoryReader) ReadNext() (*Entry, error) { return &de, nil } - // Expect ']' - t, err := dr.decoder.Token() - if err != nil { - return nil, fmt.Errorf("invalid directory data: %v", err) + if err := ensureDelimiter(dr.decoder, json.Delim(']')); err != nil { + return nil, invalidDirectoryError(err) } - if t != json.Delim(']') { - return nil, fmt.Errorf("invalid directory data: expected ']', got %v", t) - } - - // Expect '}' - t, err = dr.decoder.Token() - if err != nil { - return nil, fmt.Errorf("invalid directory data: %v", err) - } - - if t != json.Delim('}') { - return nil, fmt.Errorf("invalid directory data: expected ']', got %v", t) - } - - // Expect end of file - t, err = dr.decoder.Token() - if err != io.EOF { - return nil, fmt.Errorf("invalid directory data: expected EOF, got %v", t) + if err := ensureDelimiter(dr.decoder, json.Delim('}')); err != nil { + return nil, invalidDirectoryError(err) } return nil, io.EOF } +func invalidDirectoryError(cause error) error { + return fmt.Errorf("invalid directory data: %v", cause) +} + type directoryFormat struct { Version int `json:"version"` } +func ensureDelimiter(d *json.Decoder, expected json.Delim) error { + t, err := d.Token() + if err != nil { + return err + } + + if t != expected { + return fmt.Errorf("expected '%v', got %v", expected.String(), t) + } + + return nil +} +func ensureStringToken(d *json.Decoder, expected string) error { + t, err := d.Token() + if err != nil { + return err + } + + if s, ok := t.(string); ok { + if s == expected { + return nil + } + } + + return fmt.Errorf("expected '%v', got '%v'", expected, t) +} + func newDirectoryReader(r io.Reader) (*directoryReader, error) { dr := &directoryReader{ decoder: json.NewDecoder(r), } - var t json.Token - var err error - - // Expect opening '{' - t, err = dr.decoder.Token() - if err != nil { - return nil, fmt.Errorf("invalid directory data: %v", err) + if err := ensureDelimiter(dr.decoder, json.Delim('{')); err != nil { + return nil, invalidDirectoryError(err) } - if t != json.Delim('{') { - return nil, fmt.Errorf("invalid directory data: expected '{', got %v", t) - } - - // Expect "format" - t, err = dr.decoder.Token() - if err != nil { - return nil, fmt.Errorf("invalid directory data: %v", err) - } - - if s, ok := t.(string); ok { - if s != "format" { - return nil, fmt.Errorf("invalid directory data: expected 'format', got '%v'", s) - } - } else { - return nil, fmt.Errorf("invalid directory data: expected 'format', got '%v'", t) + if err := ensureStringToken(dr.decoder, "format"); err != nil { + return nil, invalidDirectoryError(err) } // Parse format and trailing comma var format directoryFormat - err = dr.decoder.Decode(&format) - if err != nil { - return nil, fmt.Errorf("invalid directory data: %v", err) + if err := dr.decoder.Decode(&format); err != nil { + return nil, invalidDirectoryError(err) } - // Expect "entries" - t, err = dr.decoder.Token() - if err != nil { - return nil, fmt.Errorf("invalid directory data: %v", err) + if format.Version != 1 { + return nil, invalidDirectoryError(fmt.Errorf("unsupported version: %v", format.Version)) } - if s, ok := t.(string); ok { - if s != "entries" { - return nil, fmt.Errorf("invalid directory data: expected 'entries', got '%v'", s) - } - } else { - return nil, fmt.Errorf("invalid directory data: expected 'entries', got '%v'", t) + if err := ensureStringToken(dr.decoder, "entries"); err != nil { + return nil, invalidDirectoryError(err) } - // Expect opening '[' - t, err = dr.decoder.Token() - if err != nil { - return nil, fmt.Errorf("invalid directory data: %v", err) - } - - if t != json.Delim('[') { - return nil, fmt.Errorf("invalid directory data: expected '[', got %v", t) + if err := ensureDelimiter(dr.decoder, json.Delim('[')); err != nil { + return nil, invalidDirectoryError(err) } return dr, nil diff --git a/fs/dir_test.go b/fs/dir_test.go index 3d0dd9e61..c642215b3 100644 --- a/fs/dir_test.go +++ b/fs/dir_test.go @@ -12,16 +12,14 @@ func TestDirectory(t *testing.T) { `{`, `"format":{"version":1},`, `"entries":[`, - ` {"d":"subdir","p":"420","t":"2016-04-06T02:34:10Z","o":"500:100","oid":"C1234"},`, - ` {"f":"config.go","p":"420","s":"937","t":"2016-04-02T02:39:44.123456789Z","o":"500:100","oid":"C4321"},`, - ` {"f":"constants.go","p":"420","s":"13","t":"2016-04-02T02:36:19Z","o":"500:100"},`, - ` {"f":"doc.go","p":"420","s":"112","t":"2016-04-02T02:45:54Z","o":"500:100"},`, - ` {"f":"errors.go","p":"420","s":"506","t":"2016-04-02T02:41:03Z","o":"500:100"}`, + ` {"name":"config.go","mode":"420","size":"937","modTime":"2016-04-02T02:39:44.123456789Z","owner":"500:100","oid":"C4321"},`, + ` {"name":"constants.go","mode":"420","size":"13","modTime":"2016-04-02T02:36:19Z","owner":"500:100"},`, + ` {"name":"doc.go","mode":"420","size":"112","modTime":"2016-04-02T02:45:54Z","owner":"500:100"},`, + ` {"name":"errors.go","mode":"420","size":"506","modTime":"2016-04-02T02:41:03Z","owner":"500:100"},`, + ` {"name":"subdir","mode":"d:420","modTime":"2016-04-06T02:34:10Z","owner":"500:100","oid":"C1234"}`, `]}`, }, "\n") + "\n" - t.Logf("data: %v", data) - d, err := ReadDirectory(strings.NewReader(data), "") if err != nil { t.Errorf("can't read: %v", err) @@ -41,33 +39,67 @@ func TestDirectory(t *testing.T) { } cases := []struct { - isDir bool - name string + name string }{ - {true, "subdir"}, - {false, "config.go"}, - {false, "constants.go"}, - {false, "doc.go"}, - {false, "errors.go"}, + {"subdir"}, + {"config.go"}, + {"constants.go"}, + {"doc.go"}, + {"errors.go"}, } for _, c := range cases { - e := d.FindByName(c.isDir, c.name) + e := d.FindByName(c.name) if e == nil { - t.Errorf("not found, but expected to be found: %v/%v", c.name, c.isDir) + t.Errorf("not found, but expected to be found: %v", c.name) } else if e.Name != c.name { - t.Errorf("incorrect name: %v/%v got %v", c.name, c.isDir, e.Name) - } - - if e := d.FindByName(!c.isDir, c.name); e != nil { - t.Errorf("found %v, but expected to be found: %v/%v", e.Name, c.name, c.isDir) + t.Errorf("incorrect name: %v got %v", c.name, e.Name) } } - if e := d.FindByName(true, "nosuchdir"); e != nil { - t.Errorf("found %v, but expected to be found", e.Name) - } - if e := d.FindByName(false, "nosuchfile"); e != nil { + if e := d.FindByName("nosuchdir"); e != nil { t.Errorf("found %v, but expected to be found", e.Name) } } + +func TestDirectoryNameOrder(t *testing.T) { + sortedNames := []string{ + "a/a/a", + "a/a/", + "a/b", + "a/b1", + "a/b2", + "a/", + "bar/a/a", + "bar/a/", + "bar/a.b", + "bar/a.c/", + "bar/a1/a", + "bar/a1/", + "bar/a2", + "bar/a3", + "bar/", + "foo/a/a", + "foo/a/", + "foo/b", + "foo/c/a", + "foo/c/", + "foo/d/", + "foo/e1/", + "foo/e2/", + "foo/", + "goo/a/a", + "goo/a/", + "goo/", + } + + for i, n1 := range sortedNames { + for j, n2 := range sortedNames { + expected := i <= j + actual := isLessOrEqual(n1, n2) + if actual != expected { + t.Errorf("unexpected value for isLessOrEqual('%v','%v'), expected: %v, got: %v", n1, n2, expected, actual) + } + } + } +} diff --git a/fs/entry.go b/fs/entry.go index f19282035..52585ca85 100644 --- a/fs/entry.go +++ b/fs/entry.go @@ -1,12 +1,22 @@ package fs import ( + "log" "os" + "strings" "time" "github.com/kopia/kopia/cas" ) +// import ( +// "os" +// "strings" +// "time" + +// "github.com/kopia/kopia/cas" +// ) + // Entry stores attributes of a single entry in a directory. type Entry struct { Name string @@ -18,6 +28,45 @@ type Entry struct { ObjectID cas.ObjectID } +func isLess(name1, name2 string) bool { + if name1 == name2 { + return false + } + + return isLessOrEqual(name1, name2) +} + +func split1(name string) (head, tail string) { + n := strings.IndexByte(name, '/') + if n >= 0 { + return name[0 : n+1], name[n+1:] + } + + return name, "" +} + +func isLessOrEqual(name1, name2 string) bool { + parts1 := strings.Split(name1, "/") + parts2 := strings.Split(name2, "/") + + i := 0 + for i < len(parts1) && i < len(parts2) { + if parts1[i] == parts2[i] { + i++ + continue + } + if parts1[i] == "" { + return false + } + if parts2[i] == "" { + return true + } + return parts1[i] < parts2[i] + } + + return len(parts1) <= len(parts2) +} + func (e *Entry) IsDir() bool { return e.FileMode.IsDir() } @@ -28,26 +77,27 @@ func metadataEquals(e1, e2 *Entry) bool { } if e1.FileMode != e2.FileMode { + log.Printf("a1") return false } - if e1.ModTime != e2.ModTime { + if e1.ModTime.UnixNano() != e2.ModTime.UnixNano() { + log.Printf("a2") return false } if e1.FileSize != e2.FileSize { - return false - } - - if e1.Name != e2.Name { + log.Printf("a3") return false } if e1.UserID != e2.UserID { + log.Printf("a4") return false } if e1.GroupID != e2.GroupID { + log.Printf("a5") return false } diff --git a/fs/hashcache.go b/fs/hashcache.go new file mode 100644 index 000000000..df100f6be --- /dev/null +++ b/fs/hashcache.go @@ -0,0 +1,51 @@ +package fs + +import "io" + +type hashcacheReader struct { + reader *directoryReader + nextEntry *Entry + skippedCount int +} + +func (hcr *hashcacheReader) Open(dr *directoryReader) { + hcr.reader = dr + hcr.nextEntry = nil + hcr.readahead() +} + +func (hcr *hashcacheReader) GetEntry(relativeName string) *Entry { + //log.Printf("looking for %v", relativeName) + for hcr.nextEntry != nil && isLess(hcr.nextEntry.Name, relativeName) { + hcr.skippedCount++ + hcr.readahead() + } + + if hcr.nextEntry != nil && relativeName == hcr.nextEntry.Name { + //log.Printf("*** found hashcache entry: %v", relativeName) + e := hcr.nextEntry + hcr.nextEntry = nil + hcr.readahead() + return e + } + + // if hcr.reader != nil { + // log.Printf("*** not found hashcache entry: %v", relativeName) + // } + + return nil +} + +func (hcr *hashcacheReader) SkippedCount() int { + return hcr.skippedCount +} + +func (hcr *hashcacheReader) readahead() { + if hcr.reader != nil { + next, err := hcr.reader.ReadNext() + hcr.nextEntry = next + if err == io.EOF { + hcr.reader = nil + } + } +} diff --git a/fs/lister.go b/fs/lister.go index 373c06721..fb26f0373 100644 --- a/fs/lister.go +++ b/fs/lister.go @@ -53,7 +53,7 @@ func (d *filesystemLister) List(path string) (Directory, error) { return nil, err } - sort.Sort(sortedDirectory(dir)) + sort.Sort(dir) return dir, nil } diff --git a/fs/lister_test.go b/fs/lister_test.go index 8abdec724..a2fe7783d 100644 --- a/fs/lister_test.go +++ b/fs/lister_test.go @@ -57,19 +57,19 @@ func TestLister(t *testing.T) { goodCount := 0 // Directories are first. - if dir[0].Name == "y" && dir[0].FileSize == 0 && dir[0].FileMode.IsDir() { + if dir[0].Name == "f1" && dir[0].FileSize == 5 && dir[0].FileMode.IsRegular() { goodCount++ } - if dir[1].Name == "z" && dir[1].FileSize == 0 && dir[1].FileMode.IsDir() { + if dir[1].Name == "f2" && dir[1].FileSize == 4 && dir[1].FileMode.IsRegular() { goodCount++ } - if dir[2].Name == "f1" && dir[2].FileSize == 5 && dir[2].FileMode.IsRegular() { + if dir[2].Name == "f3" && dir[2].FileSize == 3 && dir[2].FileMode.IsRegular() { goodCount++ } - if dir[3].Name == "f2" && dir[3].FileSize == 4 && dir[3].FileMode.IsRegular() { + if dir[3].Name == "y" && dir[3].FileSize == 0 && dir[3].FileMode.IsDir() { goodCount++ } - if dir[4].Name == "f3" && dir[4].FileSize == 3 && dir[4].FileMode.IsRegular() { + if dir[4].Name == "z" && dir[4].FileSize == 0 && dir[4].FileMode.IsDir() { goodCount++ } if goodCount != 5 { diff --git a/fs/upload.go b/fs/upload.go index 843096713..79b3814ce 100644 --- a/fs/upload.go +++ b/fs/upload.go @@ -4,6 +4,7 @@ "errors" "fmt" "io" + "log" "os" "path/filepath" "sync/atomic" @@ -59,11 +60,15 @@ func (u *uploader) UploadFile(path string) (cas.ObjectID, error) { } func (u *uploader) UploadDir(path string, previous cas.ObjectID) (cas.ObjectID, cas.ObjectID, error) { - var cached Directory + //log.Printf("UploadDir", path) + //defer log.Printf("finishing UploadDir", path) + var hcr hashcacheReader if previous != "" { if r, err := u.mgr.Open(previous); err == nil { - cached, _ = ReadDirectory(r, "") + if dr, err := newDirectoryReader(r); err == nil { + hcr.Open(dr) + } } } @@ -72,7 +77,7 @@ func (u *uploader) UploadDir(path string, previous cas.ObjectID) (cas.ObjectID, cas.WithBlockNamePrefix("H"), ) dw := newDirectoryWriter(manifestWriter) - oid, err := u.uploadDirInternal(path, dw, previous, cached) + oid, _, err := u.uploadDirInternal(path, ".", dw, &hcr) if err != nil { dw.Close() return oid, cas.NullObjectID, err @@ -87,14 +92,13 @@ func (u *uploader) UploadDir(path string, previous cas.ObjectID) (cas.ObjectID, return oid, manifestOid, nil } -func (u *uploader) uploadDirInternal(path string, manifest *directoryWriter, previous cas.ObjectID, previousDir Directory) (cas.ObjectID, error) { - if u.isCancelled() { - return previous, ErrUploadCancelled - } +func (u *uploader) uploadDirInternal(path string, relativePath string, hcw *directoryWriter, hcr *hashcacheReader) (cas.ObjectID, bool, error) { + //log.Printf("entering %v", path) + //defer log.Printf("exiting %v", path) dir, err := u.lister.List(path) if err != nil { - return cas.NullObjectID, err + return cas.NullObjectID, false, err } writer := u.mgr.NewWriter( @@ -105,60 +109,70 @@ func (u *uploader) uploadDirInternal(path string, manifest *directoryWriter, pre dw := newDirectoryWriter(writer) defer writer.Close() - directoryMatchesCache := len(previousDir) == len(dir) + allCached := true + s0 := hcr.SkippedCount() for _, e := range dir { fullPath := filepath.Join(path, e.Name) - - // See if we had this name during previous pass. - cachedEntry := previousDir.FindByName(e.IsDir(), e.Name) - - // ... and whether file metadata is identical to the previous one. - cachedMetadataMatches := metadataEquals(e, cachedEntry) - - // If not, directoryMatchesCache becomes false. - directoryMatchesCache = directoryMatchesCache && cachedMetadataMatches + entryRelativePath := relativePath + "/" + e.Name var oid cas.ObjectID + var cached bool if e.IsDir() { - var previousSubdirObjectID cas.ObjectID - if cachedEntry != nil { - previousSubdirObjectID = cachedEntry.ObjectID - } - - oid, err = u.uploadDirInternal(fullPath, manifest, previousSubdirObjectID, nil) + oid, cached, err = u.uploadDirInternal(fullPath, entryRelativePath, hcw, hcr) if err != nil { - return cas.NullObjectID, err + return cas.NullObjectID, false, err } - - if cachedEntry != nil && oid != cachedEntry.ObjectID { - directoryMatchesCache = false - } - } else if cachedMetadataMatches { - // Avoid hashing by reusing previous object ID. - oid = cachedEntry.ObjectID } else { - oid, err = u.UploadFile(fullPath) - if err != nil { - return cas.NullObjectID, fmt.Errorf("unable to hash file: %s", err) + // See if we had this name during previous pass. + cachedEntry := hcr.GetEntry(entryRelativePath) + // ... and whether file metadata is identical to the previous one. + cached = metadataEquals(e, cachedEntry) + + if cached { + // Avoid hashing by reusing previous object ID. + oid = cachedEntry.ObjectID + } else { + oid, err = u.UploadFile(fullPath) + if err != nil { + return cas.NullObjectID, false, fmt.Errorf("unable to hash file: %s", err) + } } } - + allCached = allCached && cached e.ObjectID = oid - dw.WriteEntry(e) - manifest.WriteEntry(e) + + if err := dw.WriteEntry(e); err != nil { + return cas.NullObjectID, false, err + } + + if e.IsDir() { + e.Name = entryRelativePath + "/" + } else { + e.Name = entryRelativePath + } + if err := hcw.WriteEntry(e); err != nil { + return cas.NullObjectID, false, err + } } + dirEntry := hcr.GetEntry(relativePath + "/") + s1 := hcr.SkippedCount() + if s0 != s1 { + allCached = false + } + + log.Printf("allCached: %v %v", relativePath, allCached) + var oid cas.ObjectID - if directoryMatchesCache && previous != "" { + if allCached && dirEntry != nil { // Avoid hashing directory listingif every entry matched the previous (possibly ignoring ordering). - oid, err = previous, nil + return dirEntry.ObjectID, true, nil } else { oid, err = writer.Result(true) + return oid, false, err } - - return oid, err } func (u *uploader) Cancel() {