mirror of
https://github.com/navidrome/navidrome.git
synced 2025-12-23 23:18:05 -05:00
* feat: Add selective folder scanning capability Implement targeted scanning of specific library/folder pairs without full recursion. This enables efficient rescanning of individual folders when changes are detected, significantly reducing scan time for large libraries. Key changes: - Add ScanTarget struct and ScanFolders API to Scanner interface - Implement CLI flag --targets for specifying libraryID:folderPath pairs - Add FolderRepository.GetByPaths() for batch folder info retrieval - Create loadSpecificFolders() for non-recursive directory loading - Scope GC operations to affected libraries only (with TODO for full impl) - Add comprehensive tests for selective scanning behavior The selective scan: - Only processes specified folders (no subdirectory recursion) - Maintains library isolation - Runs full maintenance pipeline scoped to affected libraries - Supports both full and quick scan modes Examples: navidrome scan --targets "1:Music/Rock,1:Music/Jazz" navidrome scan --full --targets "2:Classical" * feat(folder): replace GetByPaths with GetFolderUpdateInfo for improved folder updates retrieval Signed-off-by: Deluan <deluan@navidrome.org> * test: update parseTargets test to handle folder names with spaces Signed-off-by: Deluan <deluan@navidrome.org> * refactor(folder): remove unused LibraryPath struct and update GC logging message Signed-off-by: Deluan <deluan@navidrome.org> * refactor(folder): enhance external scanner to support target-specific scanning Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): simplify scanner methods Signed-off-by: Deluan <deluan@navidrome.org> * feat(watcher): implement folder scanning notifications with deduplication Signed-off-by: Deluan <deluan@navidrome.org> * refactor(watcher): add resolveFolderPath function for testability Signed-off-by: Deluan <deluan@navidrome.org> * feat(watcher): implement path ignoring based on .ndignore patterns Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): implement IgnoreChecker for managing .ndignore patterns Signed-off-by: Deluan <deluan@navidrome.org> * refactor(ignore_checker): rename scanner to lineScanner for clarity Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): enhance ScanTarget struct with String method for better target representation Signed-off-by: Deluan <deluan@navidrome.org> * fix(scanner): validate library ID to prevent negative values Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): simplify GC method by removing library ID parameter Signed-off-by: Deluan <deluan@navidrome.org> * feat(scanner): update folder scanning to include all descendants of specified folders Signed-off-by: Deluan <deluan@navidrome.org> * feat(subsonic): allow selective scan in the /startScan endpoint Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): update CallScan to handle specific library/folder pairs Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): streamline scanning logic by removing scanAll method Signed-off-by: Deluan <deluan@navidrome.org> * test: enhance mockScanner for thread safety and improve test reliability Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): move scanner.ScanTarget to model.ScanTarget Signed-off-by: Deluan <deluan@navidrome.org> * refactor: move scanner types to model,implement MockScanner Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): update scanner interface and implementations to use model.Scanner Signed-off-by: Deluan <deluan@navidrome.org> * refactor(folder_repository): normalize target path handling by using filepath.Clean Signed-off-by: Deluan <deluan@navidrome.org> * test(folder_repository): add comprehensive tests for folder retrieval and child exclusion Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): simplify selective scan logic using slice.Filter Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): streamline phase folder and album creation by removing unnecessary library parameter Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): move initialization logic from phase_1 to the scanner itself Signed-off-by: Deluan <deluan@navidrome.org> * refactor(tests): rename selective scan test file to scanner_selective_test.go Signed-off-by: Deluan <deluan@navidrome.org> * feat(configuration): add DevSelectiveWatcher configuration option Signed-off-by: Deluan <deluan@navidrome.org> * feat(watcher): enhance .ndignore handling for folder deletions and file changes Signed-off-by: Deluan <deluan@navidrome.org> * docs(scanner): comments Signed-off-by: Deluan <deluan@navidrome.org> * refactor(scanner): enhance walkDirTree to support target folder scanning Signed-off-by: Deluan <deluan@navidrome.org> * fix(scanner, watcher): handle errors when pushing ignore patterns for folders Signed-off-by: Deluan <deluan@navidrome.org> * Update scanner/phase_1_folders.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * refactor(scanner): replace parseTargets function with direct call to scanner.ParseTargets Signed-off-by: Deluan <deluan@navidrome.org> * test(scanner): add tests for ScanBegin and ScanEnd functionality Signed-off-by: Deluan <deluan@navidrome.org> * fix(library): update PRAGMA optimize to check table sizes without ANALYZE Signed-off-by: Deluan <deluan@navidrome.org> * test(scanner): refactor tests Signed-off-by: Deluan <deluan@navidrome.org> * feat(ui): add selective scan options and update translations Signed-off-by: Deluan <deluan@navidrome.org> * feat(ui): add quick and full scan options for individual libraries Signed-off-by: Deluan <deluan@navidrome.org> * feat(ui): add Scan buttonsto the LibraryList Signed-off-by: Deluan <deluan@navidrome.org> * feat(scan): update scanning parameters from 'path' to 'target' for selective scans. * refactor(scan): move ParseTargets function to model package * test(scan): suppress unused return value from SetUserLibraries in tests * feat(gc): enhance garbage collection to support selective library purging Signed-off-by: Deluan <deluan@navidrome.org> * fix(scanner): prevent race condition when scanning deleted folders When the watcher detects changes in a folder that gets deleted before the scanner runs (due to the 10-second delay), the scanner was prematurely removing these folders from the tracking map, preventing them from being marked as missing. The issue occurred because `newFolderEntry` was calling `popLastUpdate` before verifying the folder actually exists on the filesystem. Changes: - Move fs.Stat check before newFolderEntry creation in loadDir to ensure deleted folders remain in lastUpdates for finalize() to handle - Add early existence check in walkDirTree to skip non-existent target folders with a warning log - Add unit test verifying non-existent folders aren't removed from lastUpdates prematurely - Add integration test for deleted folder scenario with ScanFolders Fixes the issue where deleting entire folders (e.g., /music/AC_DC) wouldn't mark tracks as missing when using selective folder scanning. * refactor(scan): streamline folder entry creation and update handling Signed-off-by: Deluan <deluan@navidrome.org> * feat(scan): add '@Recycle' (QNAP) to ignored directories list Signed-off-by: Deluan <deluan@navidrome.org> * fix(log): improve thread safety in logging level management * test(scan): move unit tests for ParseTargets function Signed-off-by: Deluan <deluan@navidrome.org> * review Signed-off-by: Deluan <deluan@navidrome.org> --------- Signed-off-by: Deluan <deluan@navidrome.org> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: deluan <deluan.quintao@mechanical-orchard.com>
375 lines
12 KiB
Go
375 lines
12 KiB
Go
package scanner
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"maps"
|
|
"slices"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
ppl "github.com/google/go-pipeline/pkg/pipeline"
|
|
"github.com/navidrome/navidrome/conf"
|
|
"github.com/navidrome/navidrome/consts"
|
|
"github.com/navidrome/navidrome/core"
|
|
"github.com/navidrome/navidrome/core/artwork"
|
|
"github.com/navidrome/navidrome/db"
|
|
"github.com/navidrome/navidrome/log"
|
|
"github.com/navidrome/navidrome/model"
|
|
"github.com/navidrome/navidrome/utils/run"
|
|
"github.com/navidrome/navidrome/utils/slice"
|
|
)
|
|
|
|
type scannerImpl struct {
|
|
ds model.DataStore
|
|
cw artwork.CacheWarmer
|
|
pls core.Playlists
|
|
}
|
|
|
|
// scanState holds the state of an in-progress scan, to be passed to the various phases
|
|
type scanState struct {
|
|
progress chan<- *ProgressInfo
|
|
fullScan bool
|
|
changesDetected atomic.Bool
|
|
libraries model.Libraries // Store libraries list for consistency across phases
|
|
targets map[int][]string // Optional: map[libraryID][]folderPaths for selective scans
|
|
}
|
|
|
|
func (s *scanState) sendProgress(info *ProgressInfo) {
|
|
if s.progress != nil {
|
|
s.progress <- info
|
|
}
|
|
}
|
|
|
|
func (s *scanState) isSelectiveScan() bool {
|
|
return len(s.targets) > 0
|
|
}
|
|
|
|
func (s *scanState) sendWarning(msg string) {
|
|
s.sendProgress(&ProgressInfo{Warning: msg})
|
|
}
|
|
|
|
func (s *scanState) sendError(err error) {
|
|
s.sendProgress(&ProgressInfo{Error: err.Error()})
|
|
}
|
|
|
|
func (s *scannerImpl) scanFolders(ctx context.Context, fullScan bool, targets []model.ScanTarget, progress chan<- *ProgressInfo) {
|
|
startTime := time.Now()
|
|
|
|
state := scanState{
|
|
progress: progress,
|
|
fullScan: fullScan,
|
|
changesDetected: atomic.Bool{},
|
|
}
|
|
|
|
// Set changesDetected to true for full scans to ensure all maintenance operations run
|
|
if fullScan {
|
|
state.changesDetected.Store(true)
|
|
}
|
|
|
|
// Get libraries and optionally filter by targets
|
|
allLibs, err := s.ds.Library(ctx).GetAll()
|
|
if err != nil {
|
|
state.sendWarning(fmt.Sprintf("getting libraries: %s", err))
|
|
return
|
|
}
|
|
|
|
if len(targets) > 0 {
|
|
// Selective scan: filter libraries and build targets map
|
|
state.targets = make(map[int][]string)
|
|
|
|
for _, target := range targets {
|
|
folderPath := target.FolderPath
|
|
if folderPath == "" {
|
|
folderPath = "."
|
|
}
|
|
state.targets[target.LibraryID] = append(state.targets[target.LibraryID], folderPath)
|
|
}
|
|
|
|
// Filter libraries to only those in targets
|
|
state.libraries = slice.Filter(allLibs, func(lib model.Library) bool {
|
|
return len(state.targets[lib.ID]) > 0
|
|
})
|
|
|
|
log.Info(ctx, "Scanner: Starting selective scan", "fullScan", state.fullScan, "numLibraries", len(state.libraries), "numTargets", len(targets))
|
|
} else {
|
|
// Full library scan
|
|
state.libraries = allLibs
|
|
log.Info(ctx, "Scanner: Starting scan", "fullScan", state.fullScan, "numLibraries", len(state.libraries))
|
|
}
|
|
|
|
// Store scan type and start time
|
|
scanType := "quick"
|
|
if state.fullScan {
|
|
scanType = "full"
|
|
}
|
|
if state.isSelectiveScan() {
|
|
scanType += "-selective"
|
|
}
|
|
_ = s.ds.Property(ctx).Put(consts.LastScanTypeKey, scanType)
|
|
_ = s.ds.Property(ctx).Put(consts.LastScanStartTimeKey, startTime.Format(time.RFC3339))
|
|
|
|
// if there was a full scan in progress, force a full scan
|
|
if !state.fullScan {
|
|
for _, lib := range state.libraries {
|
|
if lib.FullScanInProgress {
|
|
log.Info(ctx, "Scanner: Interrupted full scan detected", "lib", lib.Name)
|
|
state.fullScan = true
|
|
if state.isSelectiveScan() {
|
|
_ = s.ds.Property(ctx).Put(consts.LastScanTypeKey, "full-selective")
|
|
} else {
|
|
_ = s.ds.Property(ctx).Put(consts.LastScanTypeKey, "full")
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// Prepare libraries for scanning (initialize LastScanStartedAt if needed)
|
|
err = s.prepareLibrariesForScan(ctx, &state)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error preparing libraries for scan", err)
|
|
state.sendError(err)
|
|
return
|
|
}
|
|
|
|
err = run.Sequentially(
|
|
// Phase 1: Scan all libraries and import new/updated files
|
|
runPhase[*folderEntry](ctx, 1, createPhaseFolders(ctx, &state, s.ds, s.cw)),
|
|
|
|
// Phase 2: Process missing files, checking for moves
|
|
runPhase[*missingTracks](ctx, 2, createPhaseMissingTracks(ctx, &state, s.ds)),
|
|
|
|
// Phases 3 and 4 can be run in parallel
|
|
run.Parallel(
|
|
// Phase 3: Refresh all new/changed albums and update artists
|
|
runPhase[*model.Album](ctx, 3, createPhaseRefreshAlbums(ctx, &state, s.ds)),
|
|
|
|
// Phase 4: Import/update playlists
|
|
runPhase[*model.Folder](ctx, 4, createPhasePlaylists(ctx, &state, s.ds, s.pls, s.cw)),
|
|
),
|
|
|
|
// Final Steps (cannot be parallelized):
|
|
|
|
// Run GC if there were any changes (Remove dangling tracks, empty albums and artists, and orphan annotations)
|
|
s.runGC(ctx, &state),
|
|
|
|
// Refresh artist and tags stats
|
|
s.runRefreshStats(ctx, &state),
|
|
|
|
// Update last_scan_completed_at for all libraries
|
|
s.runUpdateLibraries(ctx, &state),
|
|
|
|
// Optimize DB
|
|
s.runOptimize(ctx),
|
|
)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Finished with error", "duration", time.Since(startTime), err)
|
|
_ = s.ds.Property(ctx).Put(consts.LastScanErrorKey, err.Error())
|
|
state.sendError(err)
|
|
return
|
|
}
|
|
|
|
_ = s.ds.Property(ctx).Put(consts.LastScanErrorKey, "")
|
|
|
|
if state.changesDetected.Load() {
|
|
state.sendProgress(&ProgressInfo{ChangesDetected: true})
|
|
}
|
|
|
|
if state.isSelectiveScan() {
|
|
log.Info(ctx, "Scanner: Finished scanning selected folders", "duration", time.Since(startTime), "numTargets", len(targets))
|
|
} else {
|
|
log.Info(ctx, "Scanner: Finished scanning all libraries", "duration", time.Since(startTime))
|
|
}
|
|
}
|
|
|
|
// prepareLibrariesForScan initializes the scan for all libraries in the state.
|
|
// It calls ScanBegin for libraries that haven't started scanning yet (LastScanStartedAt is zero),
|
|
// reloads them to get the updated state, and filters out any libraries that fail to initialize.
|
|
func (s *scannerImpl) prepareLibrariesForScan(ctx context.Context, state *scanState) error {
|
|
var successfulLibs []model.Library
|
|
|
|
for _, lib := range state.libraries {
|
|
if lib.LastScanStartedAt.IsZero() {
|
|
// This is a new scan - mark it as started
|
|
err := s.ds.Library(ctx).ScanBegin(lib.ID, state.fullScan)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error marking scan start", "lib", lib.Name, err)
|
|
state.sendWarning(err.Error())
|
|
continue
|
|
}
|
|
|
|
// Reload library to get updated state (timestamps, etc.)
|
|
reloadedLib, err := s.ds.Library(ctx).Get(lib.ID)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error reloading library", "lib", lib.Name, err)
|
|
state.sendWarning(err.Error())
|
|
continue
|
|
}
|
|
lib = *reloadedLib
|
|
} else {
|
|
// This is a resumed scan
|
|
log.Debug(ctx, "Scanner: Resuming previous scan", "lib", lib.Name,
|
|
"lastScanStartedAt", lib.LastScanStartedAt, "fullScan", lib.FullScanInProgress)
|
|
}
|
|
|
|
successfulLibs = append(successfulLibs, lib)
|
|
}
|
|
|
|
if len(successfulLibs) == 0 {
|
|
return fmt.Errorf("no libraries available for scanning")
|
|
}
|
|
|
|
// Update state with only successfully initialized libraries
|
|
state.libraries = successfulLibs
|
|
return nil
|
|
}
|
|
|
|
func (s *scannerImpl) runGC(ctx context.Context, state *scanState) func() error {
|
|
return func() error {
|
|
state.sendProgress(&ProgressInfo{ForceUpdate: true})
|
|
return s.ds.WithTx(func(tx model.DataStore) error {
|
|
if state.changesDetected.Load() {
|
|
start := time.Now()
|
|
|
|
// For selective scans, extract library IDs to scope GC operations
|
|
var libraryIDs []int
|
|
if state.isSelectiveScan() {
|
|
libraryIDs = slices.Collect(maps.Keys(state.targets))
|
|
log.Debug(ctx, "Scanner: Running selective GC", "libraryIDs", libraryIDs)
|
|
}
|
|
|
|
err := tx.GC(ctx, libraryIDs...)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error running GC", err)
|
|
return fmt.Errorf("running GC: %w", err)
|
|
}
|
|
log.Debug(ctx, "Scanner: GC completed", "elapsed", time.Since(start))
|
|
} else {
|
|
log.Debug(ctx, "Scanner: No changes detected, skipping GC")
|
|
}
|
|
return nil
|
|
}, "scanner: GC")
|
|
}
|
|
}
|
|
|
|
func (s *scannerImpl) runRefreshStats(ctx context.Context, state *scanState) func() error {
|
|
return func() error {
|
|
if !state.changesDetected.Load() {
|
|
log.Debug(ctx, "Scanner: No changes detected, skipping refreshing stats")
|
|
return nil
|
|
}
|
|
start := time.Now()
|
|
stats, err := s.ds.Artist(ctx).RefreshStats(state.fullScan)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error refreshing artists stats", err)
|
|
return fmt.Errorf("refreshing artists stats: %w", err)
|
|
}
|
|
log.Debug(ctx, "Scanner: Refreshed artist stats", "stats", stats, "elapsed", time.Since(start))
|
|
|
|
start = time.Now()
|
|
err = s.ds.Tag(ctx).UpdateCounts()
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error updating tag counts", err)
|
|
return fmt.Errorf("updating tag counts: %w", err)
|
|
}
|
|
log.Debug(ctx, "Scanner: Updated tag counts", "elapsed", time.Since(start))
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (s *scannerImpl) runOptimize(ctx context.Context) func() error {
|
|
return func() error {
|
|
start := time.Now()
|
|
db.Optimize(ctx)
|
|
log.Debug(ctx, "Scanner: Optimized DB", "elapsed", time.Since(start))
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (s *scannerImpl) runUpdateLibraries(ctx context.Context, state *scanState) func() error {
|
|
return func() error {
|
|
start := time.Now()
|
|
return s.ds.WithTx(func(tx model.DataStore) error {
|
|
for _, lib := range state.libraries {
|
|
err := tx.Library(ctx).ScanEnd(lib.ID)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error updating last scan completed", "lib", lib.Name, err)
|
|
return fmt.Errorf("updating last scan completed: %w", err)
|
|
}
|
|
err = tx.Property(ctx).Put(consts.PIDTrackKey, conf.Server.PID.Track)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error updating track PID conf", err)
|
|
return fmt.Errorf("updating track PID conf: %w", err)
|
|
}
|
|
err = tx.Property(ctx).Put(consts.PIDAlbumKey, conf.Server.PID.Album)
|
|
if err != nil {
|
|
log.Error(ctx, "Scanner: Error updating album PID conf", err)
|
|
return fmt.Errorf("updating album PID conf: %w", err)
|
|
}
|
|
if state.changesDetected.Load() {
|
|
log.Debug(ctx, "Scanner: Refreshing library stats", "lib", lib.Name)
|
|
if err := tx.Library(ctx).RefreshStats(lib.ID); err != nil {
|
|
log.Error(ctx, "Scanner: Error refreshing library stats", "lib", lib.Name, err)
|
|
return fmt.Errorf("refreshing library stats: %w", err)
|
|
}
|
|
} else {
|
|
log.Debug(ctx, "Scanner: No changes detected, skipping library stats refresh", "lib", lib.Name)
|
|
}
|
|
}
|
|
log.Debug(ctx, "Scanner: Updated libraries after scan", "elapsed", time.Since(start), "numLibraries", len(state.libraries))
|
|
return nil
|
|
}, "scanner: update libraries")
|
|
}
|
|
}
|
|
|
|
type phase[T any] interface {
|
|
producer() ppl.Producer[T]
|
|
stages() []ppl.Stage[T]
|
|
finalize(error) error
|
|
description() string
|
|
}
|
|
|
|
func runPhase[T any](ctx context.Context, phaseNum int, phase phase[T]) func() error {
|
|
return func() error {
|
|
log.Debug(ctx, fmt.Sprintf("Scanner: Starting phase %d: %s", phaseNum, phase.description()))
|
|
start := time.Now()
|
|
|
|
producer := phase.producer()
|
|
stages := phase.stages()
|
|
|
|
// Prepend a counter stage to the phase's pipeline
|
|
counter, countStageFn := countTasks[T]()
|
|
stages = append([]ppl.Stage[T]{ppl.NewStage(countStageFn, ppl.Name("count tasks"))}, stages...)
|
|
|
|
var err error
|
|
if log.IsGreaterOrEqualTo(log.LevelDebug) {
|
|
var m *ppl.Metrics
|
|
m, err = ppl.Measure(producer, stages...)
|
|
log.Info(ctx, "Scanner: "+m.String(), err)
|
|
} else {
|
|
err = ppl.Do(producer, stages...)
|
|
}
|
|
|
|
err = phase.finalize(err)
|
|
|
|
if err != nil {
|
|
log.Error(ctx, fmt.Sprintf("Scanner: Error processing libraries in phase %d", phaseNum), "elapsed", time.Since(start), err)
|
|
} else {
|
|
log.Debug(ctx, fmt.Sprintf("Scanner: Finished phase %d", phaseNum), "elapsed", time.Since(start), "totalTasks", counter.Load())
|
|
}
|
|
|
|
return err
|
|
}
|
|
}
|
|
|
|
func countTasks[T any]() (*atomic.Int64, func(T) (T, error)) {
|
|
counter := atomic.Int64{}
|
|
return &counter, func(in T) (T, error) {
|
|
counter.Add(1)
|
|
return in, nil
|
|
}
|
|
}
|
|
|
|
var _ scanner = (*scannerImpl)(nil)
|