Files
navidrome/scanner/controller.go
Deluan Quintão 28d5299ffc feat(scanner): implement selective folder scanning and file system watcher improvements (#4674)
* feat: Add selective folder scanning capability

Implement targeted scanning of specific library/folder pairs without
full recursion. This enables efficient rescanning of individual folders
when changes are detected, significantly reducing scan time for large
libraries.

Key changes:
- Add ScanTarget struct and ScanFolders API to Scanner interface
- Implement CLI flag --targets for specifying libraryID:folderPath pairs
- Add FolderRepository.GetByPaths() for batch folder info retrieval
- Create loadSpecificFolders() for non-recursive directory loading
- Scope GC operations to affected libraries only (with TODO for full impl)
- Add comprehensive tests for selective scanning behavior

The selective scan:
- Only processes specified folders (no subdirectory recursion)
- Maintains library isolation
- Runs full maintenance pipeline scoped to affected libraries
- Supports both full and quick scan modes

Examples:
  navidrome scan --targets "1:Music/Rock,1:Music/Jazz"
  navidrome scan --full --targets "2:Classical"

* feat(folder): replace GetByPaths with GetFolderUpdateInfo for improved folder updates retrieval

Signed-off-by: Deluan <deluan@navidrome.org>

* test: update parseTargets test to handle folder names with spaces

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder): remove unused LibraryPath struct and update GC logging message

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder): enhance external scanner to support target-specific scanning

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify scanner methods

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): implement folder scanning notifications with deduplication

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(watcher): add resolveFolderPath function for testability

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): implement path ignoring based on .ndignore patterns

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): implement IgnoreChecker for managing .ndignore patterns

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(ignore_checker): rename scanner to lineScanner for clarity

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): enhance ScanTarget struct with String method for better target representation

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): validate library ID to prevent negative values

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify GC method by removing library ID parameter

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scanner): update folder scanning to include all descendants of specified folders

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): allow selective scan in the /startScan endpoint

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): update CallScan to handle specific library/folder pairs

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): streamline scanning logic by removing scanAll method

Signed-off-by: Deluan <deluan@navidrome.org>

* test: enhance mockScanner for thread safety and improve test reliability

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): move scanner.ScanTarget to model.ScanTarget

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: move scanner types to model,implement MockScanner

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): update scanner interface and implementations to use model.Scanner

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder_repository): normalize target path handling by using filepath.Clean

Signed-off-by: Deluan <deluan@navidrome.org>

* test(folder_repository): add comprehensive tests for folder retrieval and child exclusion

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify selective scan logic using slice.Filter

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): streamline phase folder and album creation by removing unnecessary library parameter

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): move initialization logic from phase_1 to the scanner itself

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(tests): rename selective scan test file to scanner_selective_test.go

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(configuration): add DevSelectiveWatcher configuration option

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): enhance .ndignore handling for folder deletions and file changes

Signed-off-by: Deluan <deluan@navidrome.org>

* docs(scanner): comments

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): enhance walkDirTree to support target folder scanning

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner, watcher): handle errors when pushing ignore patterns for folders

Signed-off-by: Deluan <deluan@navidrome.org>

* Update scanner/phase_1_folders.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* refactor(scanner): replace parseTargets function with direct call to scanner.ParseTargets

Signed-off-by: Deluan <deluan@navidrome.org>

* test(scanner): add tests for ScanBegin and ScanEnd functionality

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(library): update PRAGMA optimize to check table sizes without ANALYZE

Signed-off-by: Deluan <deluan@navidrome.org>

* test(scanner): refactor tests

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add selective scan options and update translations

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add quick and full scan options for individual libraries

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add Scan buttonsto the LibraryList

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scan): update scanning parameters from 'path' to 'target' for selective scans.

* refactor(scan): move ParseTargets function to model package

* test(scan): suppress unused return value from SetUserLibraries in tests

* feat(gc): enhance garbage collection to support selective library purging

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): prevent race condition when scanning deleted folders

When the watcher detects changes in a folder that gets deleted before
the scanner runs (due to the 10-second delay), the scanner was
prematurely removing these folders from the tracking map, preventing
them from being marked as missing.

The issue occurred because `newFolderEntry` was calling `popLastUpdate`
before verifying the folder actually exists on the filesystem.

Changes:
- Move fs.Stat check before newFolderEntry creation in loadDir to
  ensure deleted folders remain in lastUpdates for finalize() to handle
- Add early existence check in walkDirTree to skip non-existent target
  folders with a warning log
- Add unit test verifying non-existent folders aren't removed from
  lastUpdates prematurely
- Add integration test for deleted folder scenario with ScanFolders

Fixes the issue where deleting entire folders (e.g., /music/AC_DC)
wouldn't mark tracks as missing when using selective folder scanning.

* refactor(scan): streamline folder entry creation and update handling

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scan): add '@Recycle' (QNAP) to ignored directories list

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(log): improve thread safety in logging level management

* test(scan): move unit tests for ParseTargets function

Signed-off-by: Deluan <deluan@navidrome.org>

* review

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: deluan <deluan.quintao@mechanical-orchard.com>
2025-11-14 22:15:43 -05:00

311 lines
8.7 KiB
Go

package scanner
import (
"context"
"errors"
"fmt"
"sync/atomic"
"time"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/consts"
"github.com/navidrome/navidrome/core"
"github.com/navidrome/navidrome/core/artwork"
"github.com/navidrome/navidrome/core/auth"
"github.com/navidrome/navidrome/core/metrics"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/model/request"
"github.com/navidrome/navidrome/server/events"
. "github.com/navidrome/navidrome/utils/gg"
"github.com/navidrome/navidrome/utils/pl"
"golang.org/x/time/rate"
)
var (
ErrAlreadyScanning = errors.New("already scanning")
)
func New(rootCtx context.Context, ds model.DataStore, cw artwork.CacheWarmer, broker events.Broker,
pls core.Playlists, m metrics.Metrics) model.Scanner {
c := &controller{
rootCtx: rootCtx,
ds: ds,
cw: cw,
broker: broker,
pls: pls,
metrics: m,
}
if !conf.Server.DevExternalScanner {
c.limiter = P(rate.Sometimes{Interval: conf.Server.DevActivityPanelUpdateRate})
}
return c
}
func (s *controller) getScanner() scanner {
if conf.Server.DevExternalScanner {
return &scannerExternal{}
}
return &scannerImpl{ds: s.ds, cw: s.cw, pls: s.pls}
}
// CallScan starts an in-process scan of specific library/folder pairs.
// If targets is empty, it scans all libraries.
// This is meant to be called from the command line (see cmd/scan.go).
func CallScan(ctx context.Context, ds model.DataStore, pls core.Playlists, fullScan bool, targets []model.ScanTarget) (<-chan *ProgressInfo, error) {
release, err := lockScan(ctx)
if err != nil {
return nil, err
}
defer release()
ctx = auth.WithAdminUser(ctx, ds)
progress := make(chan *ProgressInfo, 100)
go func() {
defer close(progress)
scanner := &scannerImpl{ds: ds, cw: artwork.NoopCacheWarmer(), pls: pls}
scanner.scanFolders(ctx, fullScan, targets, progress)
}()
return progress, nil
}
func IsScanning() bool {
return running.Load()
}
type ProgressInfo struct {
LibID int
FileCount uint32
Path string
Phase string
ChangesDetected bool
Warning string
Error string
ForceUpdate bool
}
// scanner defines the interface for different scanner implementations.
// This allows for swapping between in-process and external scanners.
type scanner interface {
// scanFolders performs the actual scanning of folders. If targets is nil, it scans all libraries.
scanFolders(ctx context.Context, fullScan bool, targets []model.ScanTarget, progress chan<- *ProgressInfo)
}
type controller struct {
rootCtx context.Context
ds model.DataStore
cw artwork.CacheWarmer
broker events.Broker
metrics metrics.Metrics
pls core.Playlists
limiter *rate.Sometimes
count atomic.Uint32
folderCount atomic.Uint32
changesDetected bool
}
// getLastScanTime returns the most recent scan time across all libraries
func (s *controller) getLastScanTime(ctx context.Context) (time.Time, error) {
libs, err := s.ds.Library(ctx).GetAll(model.QueryOptions{
Sort: "last_scan_at",
Order: "desc",
Max: 1,
})
if err != nil {
return time.Time{}, fmt.Errorf("getting libraries: %w", err)
}
if len(libs) == 0 {
return time.Time{}, nil
}
return libs[0].LastScanAt, nil
}
// getScanInfo retrieves scan status from the database
func (s *controller) getScanInfo(ctx context.Context) (scanType string, elapsed time.Duration, lastErr string) {
lastErr, _ = s.ds.Property(ctx).DefaultGet(consts.LastScanErrorKey, "")
scanType, _ = s.ds.Property(ctx).DefaultGet(consts.LastScanTypeKey, "")
startTimeStr, _ := s.ds.Property(ctx).DefaultGet(consts.LastScanStartTimeKey, "")
if startTimeStr != "" {
startTime, err := time.Parse(time.RFC3339, startTimeStr)
if err == nil {
if running.Load() {
elapsed = time.Since(startTime)
} else {
// If scan is not running, calculate elapsed time using the most recent scan time
lastScanTime, err := s.getLastScanTime(ctx)
if err == nil && !lastScanTime.IsZero() {
elapsed = lastScanTime.Sub(startTime)
}
}
}
}
return scanType, elapsed, lastErr
}
func (s *controller) Status(ctx context.Context) (*model.ScannerStatus, error) {
lastScanTime, err := s.getLastScanTime(ctx)
if err != nil {
return nil, fmt.Errorf("getting last scan time: %w", err)
}
scanType, elapsed, lastErr := s.getScanInfo(ctx)
if running.Load() {
status := &model.ScannerStatus{
Scanning: true,
LastScan: lastScanTime,
Count: s.count.Load(),
FolderCount: s.folderCount.Load(),
LastError: lastErr,
ScanType: scanType,
ElapsedTime: elapsed,
}
return status, nil
}
count, folderCount, err := s.getCounters(ctx)
if err != nil {
return nil, fmt.Errorf("getting library stats: %w", err)
}
return &model.ScannerStatus{
Scanning: false,
LastScan: lastScanTime,
Count: uint32(count),
FolderCount: uint32(folderCount),
LastError: lastErr,
ScanType: scanType,
ElapsedTime: elapsed,
}, nil
}
func (s *controller) getCounters(ctx context.Context) (int64, int64, error) {
libs, err := s.ds.Library(ctx).GetAll()
if err != nil {
return 0, 0, fmt.Errorf("library count: %w", err)
}
var count, folderCount int64
for _, l := range libs {
count += int64(l.TotalSongs)
folderCount += int64(l.TotalFolders)
}
return count, folderCount, nil
}
func (s *controller) ScanAll(requestCtx context.Context, fullScan bool) ([]string, error) {
return s.ScanFolders(requestCtx, fullScan, nil)
}
func (s *controller) ScanFolders(requestCtx context.Context, fullScan bool, targets []model.ScanTarget) ([]string, error) {
release, err := lockScan(requestCtx)
if err != nil {
return nil, err
}
defer release()
// Prepare the context for the scan
ctx := request.AddValues(s.rootCtx, requestCtx)
ctx = auth.WithAdminUser(ctx, s.ds)
// Send the initial scan status event
s.sendMessage(ctx, &events.ScanStatus{Scanning: true, Count: 0, FolderCount: 0})
progress := make(chan *ProgressInfo, 100)
go func() {
defer close(progress)
scanner := s.getScanner()
scanner.scanFolders(ctx, fullScan, targets, progress)
}()
// Wait for the scan to finish, sending progress events to all connected clients
scanWarnings, scanError := s.trackProgress(ctx, progress)
for _, w := range scanWarnings {
log.Warn(ctx, fmt.Sprintf("Scan warning: %s", w))
}
// If changes were detected, send a refresh event to all clients
if s.changesDetected {
log.Debug(ctx, "Library changes imported. Sending refresh event")
s.broker.SendBroadcastMessage(ctx, &events.RefreshResource{})
}
// Send the final scan status event, with totals
if count, folderCount, err := s.getCounters(ctx); err != nil {
s.metrics.WriteAfterScanMetrics(ctx, false)
return scanWarnings, err
} else {
scanType, elapsed, lastErr := s.getScanInfo(ctx)
s.metrics.WriteAfterScanMetrics(ctx, true)
s.sendMessage(ctx, &events.ScanStatus{
Scanning: false,
Count: count,
FolderCount: folderCount,
Error: lastErr,
ScanType: scanType,
ElapsedTime: elapsed,
})
}
return scanWarnings, scanError
}
// This is a global variable that is used to prevent multiple scans from running at the same time.
// "There can be only one" - https://youtu.be/sqcLjcSloXs?si=VlsjEOjTJZ68zIyg
var running atomic.Bool
func lockScan(ctx context.Context) (func(), error) {
if !running.CompareAndSwap(false, true) {
log.Debug(ctx, "Scanner already running, ignoring request")
return func() {}, ErrAlreadyScanning
}
return func() {
running.Store(false)
}, nil
}
func (s *controller) trackProgress(ctx context.Context, progress <-chan *ProgressInfo) ([]string, error) {
s.count.Store(0)
s.folderCount.Store(0)
s.changesDetected = false
var warnings []string
var errs []error
for p := range pl.ReadOrDone(ctx, progress) {
if p.Error != "" {
errs = append(errs, errors.New(p.Error))
continue
}
if p.Warning != "" {
warnings = append(warnings, p.Warning)
continue
}
if p.ChangesDetected {
s.changesDetected = true
continue
}
s.count.Add(p.FileCount)
if p.FileCount > 0 {
s.folderCount.Add(1)
}
scanType, elapsed, lastErr := s.getScanInfo(ctx)
status := &events.ScanStatus{
Scanning: true,
Count: int64(s.count.Load()),
FolderCount: int64(s.folderCount.Load()),
Error: lastErr,
ScanType: scanType,
ElapsedTime: elapsed,
}
if s.limiter != nil && !p.ForceUpdate {
s.limiter.Do(func() { s.sendMessage(ctx, status) })
} else {
s.sendMessage(ctx, status)
}
}
return warnings, errors.Join(errs...)
}
func (s *controller) sendMessage(ctx context.Context, status *events.ScanStatus) {
s.broker.SendBroadcastMessage(ctx, status)
}