From 515d2ed7aea870cdbb083f2bbdd51831d134f198 Mon Sep 17 00:00:00 2001 From: Gilles Chehade Date: Mon, 11 Nov 2024 23:10:52 +0100 Subject: [PATCH] add vfs cardinality --- snapshot/backup.go | 53 ++++++++++++++++++++++++++++++++++++++++++- snapshot/vfs/types.go | 2 ++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/snapshot/backup.go b/snapshot/backup.go index 6b6ca416..3d9d6d41 100644 --- a/snapshot/backup.go +++ b/snapshot/backup.go @@ -1,6 +1,8 @@ package snapshot import ( + "bytes" + "encoding/binary" "fmt" "io" "math" @@ -115,6 +117,18 @@ func (cache *scanCache) RecordChecksum(pathname string, checksum [32]byte) error return cache.db.Put([]byte(fmt.Sprintf("__checksum__:%s", pathname)), checksum[:], nil) } +func (cache *scanCache) RecordCardinality(pathname string, files uint64, dirs uint64) error { + pathname = strings.TrimSuffix(pathname, "/") + if pathname == "" { + pathname = "/" + } + + buffer := bytes.NewBuffer(make([]byte, 0, 16)) + binary.Write(buffer, binary.LittleEndian, files) + binary.Write(buffer, binary.LittleEndian, dirs) + return cache.db.Put([]byte(fmt.Sprintf("__cardinality__:%s", pathname)), buffer.Bytes(), nil) +} + func (cache *scanCache) GetChecksum(pathname string) ([32]byte, error) { data, err := cache.db.Get([]byte(fmt.Sprintf("__checksum__:%s", pathname)), nil) if err != nil { @@ -130,6 +144,24 @@ func (cache *scanCache) GetChecksum(pathname string) ([32]byte, error) { return ret, nil } +func (cache *scanCache) GetCardinality(pathname string) (uint64, uint64, error) { + data, err := cache.db.Get([]byte(fmt.Sprintf("__cardinality__:%s", pathname)), nil) + if err != nil { + return 0, 0, err + } + + if len(data) != 16 { + return 0, 0, fmt.Errorf("invalid cardinality length: %d", len(data)) + } + + buffer := bytes.NewReader(data) + var files uint64 + var dirs uint64 + binary.Read(buffer, binary.LittleEndian, &files) + binary.Read(buffer, binary.LittleEndian, &dirs) + return files, dirs, nil +} + func (cache *scanCache) EnumerateKeysWithPrefixReverse(prefix string, isDirectory bool) (<-chan importer.ScanRecord, error) { // Create a channel to return the keys keyChan := make(chan importer.ScanRecord) @@ -534,10 +566,24 @@ func (snap *Snapshot) Backup(scanDir string, options *PushOptions) error { dirEntry := vfs.NewDirectoryEntry(filepath.Dir(record.Pathname), &record) for _, child := range record.Children { - value, err := sc.GetChecksum(filepath.Join(record.Pathname, child.Name())) + childpath := filepath.Join(record.Pathname, child.Name()) + value, err := sc.GetChecksum(childpath) if err != nil { continue } + + if child.IsDir() { + dirEntry.DirCardinality++ + + files, dirs, err := sc.GetCardinality(childpath) + if err != nil { + continue + } + dirEntry.FileCardinality += files + dirEntry.DirCardinality += dirs + } else { + dirEntry.FileCardinality++ + } dirEntry.AddChild(value, child) } @@ -558,6 +604,11 @@ func (snap *Snapshot) Backup(scanDir string, options *PushOptions) error { if err != nil { return err } + err = sc.RecordCardinality(record.Pathname, dirEntry.FileCardinality, dirEntry.DirCardinality) + if err != nil { + return err + } + atomic.AddUint64(&snap.statistics.VFSDirectoriesCount, 1) atomic.AddUint64(&snap.statistics.VFSDirectoriesSize, dirEntrySize) snap.Event(events.DirectoryOKEvent(snap.Header.SnapshotID, record.Pathname)) diff --git a/snapshot/vfs/types.go b/snapshot/vfs/types.go index f5026936..4aa75c2d 100644 --- a/snapshot/vfs/types.go +++ b/snapshot/vfs/types.go @@ -195,6 +195,8 @@ type DirEntry struct { CustomMetadata []CustomMetadata `msgpack:"customMetadata,omitempty"` // Custom key-value metadata defined by the user (optional) Tags []string `msgpack:"tags,omitempty"` // List of tags associated with the directory (optional) ParentPath string `msgpack:"parentPath,omitempty"` // Path to the parent directory (optional) + FileCardinality uint64 `msgpack:"fileCardinality,omitempty"` // Number of files in the directory + DirCardinality uint64 `msgpack:"dirCardinality,omitempty"` // Number of subdirectories in the directory } func (*DirEntry) fsEntry() {}