feat(scanner): implement file-based target passing for large target lists

Signed-off-by: Deluan <deluan@navidrome.org>
This commit is contained in:
Deluan
2025-12-16 16:08:32 -05:00
parent 8c80be56da
commit 9ed309ac81
4 changed files with 363 additions and 6 deletions

View File

@@ -1,9 +1,12 @@
package cmd package cmd
import ( import (
"bufio"
"context" "context"
"encoding/gob" "encoding/gob"
"fmt"
"os" "os"
"strings"
"github.com/navidrome/navidrome/core" "github.com/navidrome/navidrome/core"
"github.com/navidrome/navidrome/db" "github.com/navidrome/navidrome/db"
@@ -19,12 +22,14 @@ var (
fullScan bool fullScan bool
subprocess bool subprocess bool
targets []string targets []string
targetFile string
) )
func init() { func init() {
scanCmd.Flags().BoolVarP(&fullScan, "full", "f", false, "check all subfolders, ignoring timestamps") scanCmd.Flags().BoolVarP(&fullScan, "full", "f", false, "check all subfolders, ignoring timestamps")
scanCmd.Flags().BoolVarP(&subprocess, "subprocess", "", false, "run as subprocess (internal use)") scanCmd.Flags().BoolVarP(&subprocess, "subprocess", "", false, "run as subprocess (internal use)")
scanCmd.Flags().StringArrayVarP(&targets, "target", "t", []string{}, "list of libraryID:folderPath pairs, can be repeated (e.g., \"-t 1:Music/Rock -t 1:Music/Jazz -t 2:Classical\")") scanCmd.Flags().StringArrayVarP(&targets, "target", "t", []string{}, "list of libraryID:folderPath pairs, can be repeated (e.g., \"-t 1:Music/Rock -t 1:Music/Jazz -t 2:Classical\")")
scanCmd.Flags().StringVar(&targetFile, "target-file", "", "path to file containing targets (one libraryID:folderPath per line)")
rootCmd.AddCommand(scanCmd) rootCmd.AddCommand(scanCmd)
} }
@@ -71,10 +76,17 @@ func runScanner(ctx context.Context) {
ds := persistence.New(sqlDB) ds := persistence.New(sqlDB)
pls := core.NewPlaylists(ds) pls := core.NewPlaylists(ds)
// Parse targets if provided // Parse targets from command line or file
var scanTargets []model.ScanTarget var scanTargets []model.ScanTarget
if len(targets) > 0 { var err error
var err error
if targetFile != "" {
scanTargets, err = readTargetsFromFile(targetFile)
if err != nil {
log.Fatal(ctx, "Failed to read targets from file", err)
}
log.Info(ctx, "Scanning specific folders from file", "numTargets", len(scanTargets))
} else if len(targets) > 0 {
scanTargets, err = model.ParseTargets(targets) scanTargets, err = model.ParseTargets(targets)
if err != nil { if err != nil {
log.Fatal(ctx, "Failed to parse targets", err) log.Fatal(ctx, "Failed to parse targets", err)
@@ -94,3 +106,31 @@ func runScanner(ctx context.Context) {
trackScanInteractively(ctx, progress) trackScanInteractively(ctx, progress)
} }
} }
// readTargetsFromFile reads scan targets from a file, one per line.
// Each line should be in the format "libraryID:folderPath".
// Empty lines and lines starting with # are ignored.
func readTargetsFromFile(filePath string) ([]model.ScanTarget, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("failed to open target file: %w", err)
}
defer file.Close()
var targetStrings []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Skip empty lines and comments
if line == "" {
continue
}
targetStrings = append(targetStrings, line)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to read target file: %w", err)
}
return model.ParseTargets(targetStrings)
}

89
cmd/scan_test.go Normal file
View File

@@ -0,0 +1,89 @@
package cmd
import (
"os"
"path/filepath"
"github.com/navidrome/navidrome/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("readTargetsFromFile", func() {
var tempDir string
BeforeEach(func() {
var err error
tempDir, err = os.MkdirTemp("", "navidrome-test-")
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
os.RemoveAll(tempDir)
})
It("reads valid targets from file", func() {
filePath := filepath.Join(tempDir, "targets.txt")
content := "1:Music/Rock\n2:Music/Jazz\n3:Classical\n"
err := os.WriteFile(filePath, []byte(content), 0600)
Expect(err).ToNot(HaveOccurred())
targets, err := readTargetsFromFile(filePath)
Expect(err).ToNot(HaveOccurred())
Expect(targets).To(HaveLen(3))
Expect(targets[0]).To(Equal(model.ScanTarget{LibraryID: 1, FolderPath: "Music/Rock"}))
Expect(targets[1]).To(Equal(model.ScanTarget{LibraryID: 2, FolderPath: "Music/Jazz"}))
Expect(targets[2]).To(Equal(model.ScanTarget{LibraryID: 3, FolderPath: "Classical"}))
})
It("skips empty lines", func() {
filePath := filepath.Join(tempDir, "targets.txt")
content := "1:Music/Rock\n\n2:Music/Jazz\n\n"
err := os.WriteFile(filePath, []byte(content), 0600)
Expect(err).ToNot(HaveOccurred())
targets, err := readTargetsFromFile(filePath)
Expect(err).ToNot(HaveOccurred())
Expect(targets).To(HaveLen(2))
})
It("trims whitespace", func() {
filePath := filepath.Join(tempDir, "targets.txt")
content := " 1:Music/Rock \n\t2:Music/Jazz\t\n"
err := os.WriteFile(filePath, []byte(content), 0600)
Expect(err).ToNot(HaveOccurred())
targets, err := readTargetsFromFile(filePath)
Expect(err).ToNot(HaveOccurred())
Expect(targets).To(HaveLen(2))
Expect(targets[0].FolderPath).To(Equal("Music/Rock"))
Expect(targets[1].FolderPath).To(Equal("Music/Jazz"))
})
It("returns error for non-existent file", func() {
_, err := readTargetsFromFile("/nonexistent/file.txt")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("failed to open target file"))
})
It("returns error for invalid target format", func() {
filePath := filepath.Join(tempDir, "targets.txt")
content := "invalid-format\n"
err := os.WriteFile(filePath, []byte(content), 0600)
Expect(err).ToNot(HaveOccurred())
_, err = readTargetsFromFile(filePath)
Expect(err).To(HaveOccurred())
})
It("handles mixed valid and empty lines", func() {
filePath := filepath.Join(tempDir, "targets.txt")
content := "\n1:Music/Rock\n\n\n2:Music/Jazz\n\n"
err := os.WriteFile(filePath, []byte(content), 0600)
Expect(err).ToNot(HaveOccurred())
targets, err := readTargetsFromFile(filePath)
Expect(err).ToNot(HaveOccurred())
Expect(targets).To(HaveLen(2))
})
})

View File

@@ -14,6 +14,12 @@ import (
"github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/model"
) )
const (
// argLengthThreshold is the threshold for switching from command-line args to file-based target passing.
// Set conservatively at 24KB to support Windows (~32KB limit) with margin for env vars.
argLengthThreshold = 24 * 1024
)
// scannerExternal is a scanner that runs an external process to do the scanning. It is used to avoid // scannerExternal is a scanner that runs an external process to do the scanning. It is used to avoid
// memory leaks or retention in the main process, as the scanner can consume a lot of memory. The // memory leaks or retention in the main process, as the scanner can consume a lot of memory. The
// external process will be spawned with the same executable as the current process, and will run // external process will be spawned with the same executable as the current process, and will run
@@ -45,10 +51,14 @@ func (s *scannerExternal) scan(ctx context.Context, fullScan bool, targets []mod
// Add targets if provided // Add targets if provided
if len(targets) > 0 { if len(targets) > 0 {
for _, target := range targets { targetArgs, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
args = append(args, "-t", target.String()) if err != nil {
progress <- &ProgressInfo{Error: err.Error()}
return
} }
log.Debug(ctx, "Spawning external scanner process with targets", "fullScan", fullScan, "path", exe, "targets", targets) defer cleanup()
log.Debug(ctx, "Spawning external scanner process with target file", "fullScan", fullScan, "path", exe, "numTargets", len(targets))
args = append(args, targetArgs...)
} else { } else {
log.Debug(ctx, "Spawning external scanner process", "fullScan", fullScan, "path", exe) log.Debug(ctx, "Spawning external scanner process", "fullScan", fullScan, "path", exe)
} }
@@ -98,4 +108,62 @@ func (s *scannerExternal) wait(cmd *exec.Cmd, out *io.PipeWriter) {
_ = out.Close() _ = out.Close()
} }
// targetArguments builds command-line arguments for the given scan targets.
// If the estimated argument length exceeds a threshold, it writes the targets to a temp file
// and returns the --target-file argument instead.
// Returns the arguments, a cleanup function to remove any temp file created, and an error if any.
func targetArguments(ctx context.Context, targets []model.ScanTarget, lengthThreshold int) ([]string, func(), error) {
var args []string
// Estimate argument length to decide whether to use file-based approach
argLength := estimateArgLength(targets)
if argLength > lengthThreshold {
// Write targets to temp file and pass via --target-file
targetFile, err := writeTargetsToFile(targets)
if err != nil {
return nil, nil, fmt.Errorf("failed to write targets to file: %w", err)
}
args = append(args, "--target-file", targetFile)
return args, func() {
os.Remove(targetFile) // Clean up temp file
}, nil
}
// Use command-line arguments for small target lists
for _, target := range targets {
args = append(args, "-t", target.String())
}
return args, func() {}, nil
}
// estimateArgLength estimates the total length of command-line arguments for the given targets.
func estimateArgLength(targets []model.ScanTarget) int {
length := 0
for _, target := range targets {
// Each target adds: "-t " + target string + space
length += 3 + len(target.String()) + 1
}
return length
}
// writeTargetsToFile writes the targets to a temporary file, one per line.
// Returns the path to the temp file, which the caller should clean up.
func writeTargetsToFile(targets []model.ScanTarget) (string, error) {
tmpFile, err := os.CreateTemp("", "navidrome-scan-targets-*.txt")
if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err)
}
defer tmpFile.Close()
for _, target := range targets {
if _, err := fmt.Fprintln(tmpFile, target.String()); err != nil {
os.Remove(tmpFile.Name())
return "", fmt.Errorf("failed to write to temp file: %w", err)
}
}
return tmpFile.Name(), nil
}
var _ scanner = (*scannerExternal)(nil) var _ scanner = (*scannerExternal)(nil)

160
scanner/external_test.go Normal file
View File

@@ -0,0 +1,160 @@
package scanner
import (
"context"
"os"
"strings"
"github.com/navidrome/navidrome/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("targetArguments", func() {
var ctx context.Context
BeforeEach(func() {
ctx = GinkgoT().Context()
})
Context("with small target list", func() {
It("returns command-line arguments for single target", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock"},
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{"-t", "1:Music/Rock"}))
})
It("returns command-line arguments for multiple targets", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock"},
{LibraryID: 2, FolderPath: "Music/Jazz"},
{LibraryID: 3, FolderPath: "Classical"},
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{
"-t", "1:Music/Rock",
"-t", "2:Music/Jazz",
"-t", "3:Classical",
}))
})
It("handles targets with special characters", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock & Roll"},
{LibraryID: 2, FolderPath: "Music/Jazz (Modern)"},
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{
"-t", "1:Music/Rock & Roll",
"-t", "2:Music/Jazz (Modern)",
}))
})
})
Context("with large target list exceeding threshold", func() {
It("returns --target-file argument when exceeding threshold", func() {
// Create enough targets to exceed the threshold
var targets []model.ScanTarget
for i := 1; i <= 600; i++ {
targets = append(targets, model.ScanTarget{
LibraryID: 1,
FolderPath: "Music/VeryLongFolderPathToSimulateRealScenario/SubFolder",
})
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(HaveLen(2))
Expect(args[0]).To(Equal("--target-file"))
// Verify the file exists and has correct format
filePath := args[1]
Expect(filePath).To(ContainSubstring("navidrome-scan-targets-"))
Expect(filePath).To(HaveSuffix(".txt"))
// Verify file actually exists
_, err = os.Stat(filePath)
Expect(err).ToNot(HaveOccurred())
})
It("creates temp file with correct format", func() {
// Use custom threshold to easily exceed it
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock"},
{LibraryID: 2, FolderPath: "Music/Jazz"},
{LibraryID: 3, FolderPath: "Classical"},
}
// Set threshold very low to force file usage
args, cleanup, err := targetArguments(ctx, targets, 10)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args[0]).To(Equal("--target-file"))
// Verify file exists with correct format
filePath := args[1]
Expect(filePath).To(ContainSubstring("navidrome-scan-targets-"))
Expect(filePath).To(HaveSuffix(".txt"))
// Verify file content
content, err := os.ReadFile(filePath)
Expect(err).ToNot(HaveOccurred())
lines := strings.Split(strings.TrimSpace(string(content)), "\n")
Expect(lines).To(HaveLen(3))
Expect(lines[0]).To(Equal("1:Music/Rock"))
Expect(lines[1]).To(Equal("2:Music/Jazz"))
Expect(lines[2]).To(Equal("3:Classical"))
})
})
Context("edge cases", func() {
It("handles empty target list", func() {
var targets []model.ScanTarget
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(BeEmpty())
})
It("uses command-line args when exactly at threshold", func() {
// Create targets that are exactly at threshold
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music"},
}
// Estimate length should be 11 bytes
estimatedLength := estimateArgLength(targets)
args, cleanup, err := targetArguments(ctx, targets, estimatedLength)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{"-t", "1:Music"}))
})
It("uses file when one byte over threshold", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music"},
}
// Set threshold just below the estimated length
estimatedLength := estimateArgLength(targets)
args, cleanup, err := targetArguments(ctx, targets, estimatedLength-1)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args[0]).To(Equal("--target-file"))
})
})
})