Files
navidrome/scanner/scanner.go
Deluan Quintão 28d5299ffc feat(scanner): implement selective folder scanning and file system watcher improvements (#4674)
* feat: Add selective folder scanning capability

Implement targeted scanning of specific library/folder pairs without
full recursion. This enables efficient rescanning of individual folders
when changes are detected, significantly reducing scan time for large
libraries.

Key changes:
- Add ScanTarget struct and ScanFolders API to Scanner interface
- Implement CLI flag --targets for specifying libraryID:folderPath pairs
- Add FolderRepository.GetByPaths() for batch folder info retrieval
- Create loadSpecificFolders() for non-recursive directory loading
- Scope GC operations to affected libraries only (with TODO for full impl)
- Add comprehensive tests for selective scanning behavior

The selective scan:
- Only processes specified folders (no subdirectory recursion)
- Maintains library isolation
- Runs full maintenance pipeline scoped to affected libraries
- Supports both full and quick scan modes

Examples:
  navidrome scan --targets "1:Music/Rock,1:Music/Jazz"
  navidrome scan --full --targets "2:Classical"

* feat(folder): replace GetByPaths with GetFolderUpdateInfo for improved folder updates retrieval

Signed-off-by: Deluan <deluan@navidrome.org>

* test: update parseTargets test to handle folder names with spaces

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder): remove unused LibraryPath struct and update GC logging message

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder): enhance external scanner to support target-specific scanning

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify scanner methods

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): implement folder scanning notifications with deduplication

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(watcher): add resolveFolderPath function for testability

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): implement path ignoring based on .ndignore patterns

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): implement IgnoreChecker for managing .ndignore patterns

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(ignore_checker): rename scanner to lineScanner for clarity

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): enhance ScanTarget struct with String method for better target representation

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): validate library ID to prevent negative values

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify GC method by removing library ID parameter

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scanner): update folder scanning to include all descendants of specified folders

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): allow selective scan in the /startScan endpoint

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): update CallScan to handle specific library/folder pairs

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): streamline scanning logic by removing scanAll method

Signed-off-by: Deluan <deluan@navidrome.org>

* test: enhance mockScanner for thread safety and improve test reliability

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): move scanner.ScanTarget to model.ScanTarget

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: move scanner types to model,implement MockScanner

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): update scanner interface and implementations to use model.Scanner

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder_repository): normalize target path handling by using filepath.Clean

Signed-off-by: Deluan <deluan@navidrome.org>

* test(folder_repository): add comprehensive tests for folder retrieval and child exclusion

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify selective scan logic using slice.Filter

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): streamline phase folder and album creation by removing unnecessary library parameter

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): move initialization logic from phase_1 to the scanner itself

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(tests): rename selective scan test file to scanner_selective_test.go

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(configuration): add DevSelectiveWatcher configuration option

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): enhance .ndignore handling for folder deletions and file changes

Signed-off-by: Deluan <deluan@navidrome.org>

* docs(scanner): comments

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): enhance walkDirTree to support target folder scanning

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner, watcher): handle errors when pushing ignore patterns for folders

Signed-off-by: Deluan <deluan@navidrome.org>

* Update scanner/phase_1_folders.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* refactor(scanner): replace parseTargets function with direct call to scanner.ParseTargets

Signed-off-by: Deluan <deluan@navidrome.org>

* test(scanner): add tests for ScanBegin and ScanEnd functionality

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(library): update PRAGMA optimize to check table sizes without ANALYZE

Signed-off-by: Deluan <deluan@navidrome.org>

* test(scanner): refactor tests

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add selective scan options and update translations

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add quick and full scan options for individual libraries

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add Scan buttonsto the LibraryList

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scan): update scanning parameters from 'path' to 'target' for selective scans.

* refactor(scan): move ParseTargets function to model package

* test(scan): suppress unused return value from SetUserLibraries in tests

* feat(gc): enhance garbage collection to support selective library purging

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): prevent race condition when scanning deleted folders

When the watcher detects changes in a folder that gets deleted before
the scanner runs (due to the 10-second delay), the scanner was
prematurely removing these folders from the tracking map, preventing
them from being marked as missing.

The issue occurred because `newFolderEntry` was calling `popLastUpdate`
before verifying the folder actually exists on the filesystem.

Changes:
- Move fs.Stat check before newFolderEntry creation in loadDir to
  ensure deleted folders remain in lastUpdates for finalize() to handle
- Add early existence check in walkDirTree to skip non-existent target
  folders with a warning log
- Add unit test verifying non-existent folders aren't removed from
  lastUpdates prematurely
- Add integration test for deleted folder scenario with ScanFolders

Fixes the issue where deleting entire folders (e.g., /music/AC_DC)
wouldn't mark tracks as missing when using selective folder scanning.

* refactor(scan): streamline folder entry creation and update handling

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scan): add '@Recycle' (QNAP) to ignored directories list

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(log): improve thread safety in logging level management

* test(scan): move unit tests for ParseTargets function

Signed-off-by: Deluan <deluan@navidrome.org>

* review

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: deluan <deluan.quintao@mechanical-orchard.com>
2025-11-14 22:15:43 -05:00

375 lines
12 KiB
Go

package scanner
import (
"context"
"fmt"
"maps"
"slices"
"sync/atomic"
"time"
ppl "github.com/google/go-pipeline/pkg/pipeline"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/consts"
"github.com/navidrome/navidrome/core"
"github.com/navidrome/navidrome/core/artwork"
"github.com/navidrome/navidrome/db"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/run"
"github.com/navidrome/navidrome/utils/slice"
)
type scannerImpl struct {
ds model.DataStore
cw artwork.CacheWarmer
pls core.Playlists
}
// scanState holds the state of an in-progress scan, to be passed to the various phases
type scanState struct {
progress chan<- *ProgressInfo
fullScan bool
changesDetected atomic.Bool
libraries model.Libraries // Store libraries list for consistency across phases
targets map[int][]string // Optional: map[libraryID][]folderPaths for selective scans
}
func (s *scanState) sendProgress(info *ProgressInfo) {
if s.progress != nil {
s.progress <- info
}
}
func (s *scanState) isSelectiveScan() bool {
return len(s.targets) > 0
}
func (s *scanState) sendWarning(msg string) {
s.sendProgress(&ProgressInfo{Warning: msg})
}
func (s *scanState) sendError(err error) {
s.sendProgress(&ProgressInfo{Error: err.Error()})
}
func (s *scannerImpl) scanFolders(ctx context.Context, fullScan bool, targets []model.ScanTarget, progress chan<- *ProgressInfo) {
startTime := time.Now()
state := scanState{
progress: progress,
fullScan: fullScan,
changesDetected: atomic.Bool{},
}
// Set changesDetected to true for full scans to ensure all maintenance operations run
if fullScan {
state.changesDetected.Store(true)
}
// Get libraries and optionally filter by targets
allLibs, err := s.ds.Library(ctx).GetAll()
if err != nil {
state.sendWarning(fmt.Sprintf("getting libraries: %s", err))
return
}
if len(targets) > 0 {
// Selective scan: filter libraries and build targets map
state.targets = make(map[int][]string)
for _, target := range targets {
folderPath := target.FolderPath
if folderPath == "" {
folderPath = "."
}
state.targets[target.LibraryID] = append(state.targets[target.LibraryID], folderPath)
}
// Filter libraries to only those in targets
state.libraries = slice.Filter(allLibs, func(lib model.Library) bool {
return len(state.targets[lib.ID]) > 0
})
log.Info(ctx, "Scanner: Starting selective scan", "fullScan", state.fullScan, "numLibraries", len(state.libraries), "numTargets", len(targets))
} else {
// Full library scan
state.libraries = allLibs
log.Info(ctx, "Scanner: Starting scan", "fullScan", state.fullScan, "numLibraries", len(state.libraries))
}
// Store scan type and start time
scanType := "quick"
if state.fullScan {
scanType = "full"
}
if state.isSelectiveScan() {
scanType += "-selective"
}
_ = s.ds.Property(ctx).Put(consts.LastScanTypeKey, scanType)
_ = s.ds.Property(ctx).Put(consts.LastScanStartTimeKey, startTime.Format(time.RFC3339))
// if there was a full scan in progress, force a full scan
if !state.fullScan {
for _, lib := range state.libraries {
if lib.FullScanInProgress {
log.Info(ctx, "Scanner: Interrupted full scan detected", "lib", lib.Name)
state.fullScan = true
if state.isSelectiveScan() {
_ = s.ds.Property(ctx).Put(consts.LastScanTypeKey, "full-selective")
} else {
_ = s.ds.Property(ctx).Put(consts.LastScanTypeKey, "full")
}
break
}
}
}
// Prepare libraries for scanning (initialize LastScanStartedAt if needed)
err = s.prepareLibrariesForScan(ctx, &state)
if err != nil {
log.Error(ctx, "Scanner: Error preparing libraries for scan", err)
state.sendError(err)
return
}
err = run.Sequentially(
// Phase 1: Scan all libraries and import new/updated files
runPhase[*folderEntry](ctx, 1, createPhaseFolders(ctx, &state, s.ds, s.cw)),
// Phase 2: Process missing files, checking for moves
runPhase[*missingTracks](ctx, 2, createPhaseMissingTracks(ctx, &state, s.ds)),
// Phases 3 and 4 can be run in parallel
run.Parallel(
// Phase 3: Refresh all new/changed albums and update artists
runPhase[*model.Album](ctx, 3, createPhaseRefreshAlbums(ctx, &state, s.ds)),
// Phase 4: Import/update playlists
runPhase[*model.Folder](ctx, 4, createPhasePlaylists(ctx, &state, s.ds, s.pls, s.cw)),
),
// Final Steps (cannot be parallelized):
// Run GC if there were any changes (Remove dangling tracks, empty albums and artists, and orphan annotations)
s.runGC(ctx, &state),
// Refresh artist and tags stats
s.runRefreshStats(ctx, &state),
// Update last_scan_completed_at for all libraries
s.runUpdateLibraries(ctx, &state),
// Optimize DB
s.runOptimize(ctx),
)
if err != nil {
log.Error(ctx, "Scanner: Finished with error", "duration", time.Since(startTime), err)
_ = s.ds.Property(ctx).Put(consts.LastScanErrorKey, err.Error())
state.sendError(err)
return
}
_ = s.ds.Property(ctx).Put(consts.LastScanErrorKey, "")
if state.changesDetected.Load() {
state.sendProgress(&ProgressInfo{ChangesDetected: true})
}
if state.isSelectiveScan() {
log.Info(ctx, "Scanner: Finished scanning selected folders", "duration", time.Since(startTime), "numTargets", len(targets))
} else {
log.Info(ctx, "Scanner: Finished scanning all libraries", "duration", time.Since(startTime))
}
}
// prepareLibrariesForScan initializes the scan for all libraries in the state.
// It calls ScanBegin for libraries that haven't started scanning yet (LastScanStartedAt is zero),
// reloads them to get the updated state, and filters out any libraries that fail to initialize.
func (s *scannerImpl) prepareLibrariesForScan(ctx context.Context, state *scanState) error {
var successfulLibs []model.Library
for _, lib := range state.libraries {
if lib.LastScanStartedAt.IsZero() {
// This is a new scan - mark it as started
err := s.ds.Library(ctx).ScanBegin(lib.ID, state.fullScan)
if err != nil {
log.Error(ctx, "Scanner: Error marking scan start", "lib", lib.Name, err)
state.sendWarning(err.Error())
continue
}
// Reload library to get updated state (timestamps, etc.)
reloadedLib, err := s.ds.Library(ctx).Get(lib.ID)
if err != nil {
log.Error(ctx, "Scanner: Error reloading library", "lib", lib.Name, err)
state.sendWarning(err.Error())
continue
}
lib = *reloadedLib
} else {
// This is a resumed scan
log.Debug(ctx, "Scanner: Resuming previous scan", "lib", lib.Name,
"lastScanStartedAt", lib.LastScanStartedAt, "fullScan", lib.FullScanInProgress)
}
successfulLibs = append(successfulLibs, lib)
}
if len(successfulLibs) == 0 {
return fmt.Errorf("no libraries available for scanning")
}
// Update state with only successfully initialized libraries
state.libraries = successfulLibs
return nil
}
func (s *scannerImpl) runGC(ctx context.Context, state *scanState) func() error {
return func() error {
state.sendProgress(&ProgressInfo{ForceUpdate: true})
return s.ds.WithTx(func(tx model.DataStore) error {
if state.changesDetected.Load() {
start := time.Now()
// For selective scans, extract library IDs to scope GC operations
var libraryIDs []int
if state.isSelectiveScan() {
libraryIDs = slices.Collect(maps.Keys(state.targets))
log.Debug(ctx, "Scanner: Running selective GC", "libraryIDs", libraryIDs)
}
err := tx.GC(ctx, libraryIDs...)
if err != nil {
log.Error(ctx, "Scanner: Error running GC", err)
return fmt.Errorf("running GC: %w", err)
}
log.Debug(ctx, "Scanner: GC completed", "elapsed", time.Since(start))
} else {
log.Debug(ctx, "Scanner: No changes detected, skipping GC")
}
return nil
}, "scanner: GC")
}
}
func (s *scannerImpl) runRefreshStats(ctx context.Context, state *scanState) func() error {
return func() error {
if !state.changesDetected.Load() {
log.Debug(ctx, "Scanner: No changes detected, skipping refreshing stats")
return nil
}
start := time.Now()
stats, err := s.ds.Artist(ctx).RefreshStats(state.fullScan)
if err != nil {
log.Error(ctx, "Scanner: Error refreshing artists stats", err)
return fmt.Errorf("refreshing artists stats: %w", err)
}
log.Debug(ctx, "Scanner: Refreshed artist stats", "stats", stats, "elapsed", time.Since(start))
start = time.Now()
err = s.ds.Tag(ctx).UpdateCounts()
if err != nil {
log.Error(ctx, "Scanner: Error updating tag counts", err)
return fmt.Errorf("updating tag counts: %w", err)
}
log.Debug(ctx, "Scanner: Updated tag counts", "elapsed", time.Since(start))
return nil
}
}
func (s *scannerImpl) runOptimize(ctx context.Context) func() error {
return func() error {
start := time.Now()
db.Optimize(ctx)
log.Debug(ctx, "Scanner: Optimized DB", "elapsed", time.Since(start))
return nil
}
}
func (s *scannerImpl) runUpdateLibraries(ctx context.Context, state *scanState) func() error {
return func() error {
start := time.Now()
return s.ds.WithTx(func(tx model.DataStore) error {
for _, lib := range state.libraries {
err := tx.Library(ctx).ScanEnd(lib.ID)
if err != nil {
log.Error(ctx, "Scanner: Error updating last scan completed", "lib", lib.Name, err)
return fmt.Errorf("updating last scan completed: %w", err)
}
err = tx.Property(ctx).Put(consts.PIDTrackKey, conf.Server.PID.Track)
if err != nil {
log.Error(ctx, "Scanner: Error updating track PID conf", err)
return fmt.Errorf("updating track PID conf: %w", err)
}
err = tx.Property(ctx).Put(consts.PIDAlbumKey, conf.Server.PID.Album)
if err != nil {
log.Error(ctx, "Scanner: Error updating album PID conf", err)
return fmt.Errorf("updating album PID conf: %w", err)
}
if state.changesDetected.Load() {
log.Debug(ctx, "Scanner: Refreshing library stats", "lib", lib.Name)
if err := tx.Library(ctx).RefreshStats(lib.ID); err != nil {
log.Error(ctx, "Scanner: Error refreshing library stats", "lib", lib.Name, err)
return fmt.Errorf("refreshing library stats: %w", err)
}
} else {
log.Debug(ctx, "Scanner: No changes detected, skipping library stats refresh", "lib", lib.Name)
}
}
log.Debug(ctx, "Scanner: Updated libraries after scan", "elapsed", time.Since(start), "numLibraries", len(state.libraries))
return nil
}, "scanner: update libraries")
}
}
type phase[T any] interface {
producer() ppl.Producer[T]
stages() []ppl.Stage[T]
finalize(error) error
description() string
}
func runPhase[T any](ctx context.Context, phaseNum int, phase phase[T]) func() error {
return func() error {
log.Debug(ctx, fmt.Sprintf("Scanner: Starting phase %d: %s", phaseNum, phase.description()))
start := time.Now()
producer := phase.producer()
stages := phase.stages()
// Prepend a counter stage to the phase's pipeline
counter, countStageFn := countTasks[T]()
stages = append([]ppl.Stage[T]{ppl.NewStage(countStageFn, ppl.Name("count tasks"))}, stages...)
var err error
if log.IsGreaterOrEqualTo(log.LevelDebug) {
var m *ppl.Metrics
m, err = ppl.Measure(producer, stages...)
log.Info(ctx, "Scanner: "+m.String(), err)
} else {
err = ppl.Do(producer, stages...)
}
err = phase.finalize(err)
if err != nil {
log.Error(ctx, fmt.Sprintf("Scanner: Error processing libraries in phase %d", phaseNum), "elapsed", time.Since(start), err)
} else {
log.Debug(ctx, fmt.Sprintf("Scanner: Finished phase %d", phaseNum), "elapsed", time.Since(start), "totalTasks", counter.Load())
}
return err
}
}
func countTasks[T any]() (*atomic.Int64, func(T) (T, error)) {
counter := atomic.Int64{}
return &counter, func(in T) (T, error) {
counter.Add(1)
return in, nil
}
}
var _ scanner = (*scannerImpl)(nil)