Files
navidrome/core/lyrics/sources_test.go
Deluan Quintão 91fab68578 fix: handle UTF BOM in lyrics and playlist files (#4637)
* fix: handle UTF-8 BOM in lyrics and playlist files

Added UTF-8 BOM (Byte Order Mark) detection and stripping for external lyrics files and playlist files. This ensures that files with BOM markers are correctly parsed and recognized as synced lyrics or valid playlists.

The fix introduces a new ioutils package with UTF8Reader and UTF8ReadFile functions that automatically detect and remove UTF-8, UTF-16 LE, and UTF-16 BE BOMs. These utilities are now used when reading external lyrics and playlist files to ensure consistent parsing regardless of BOM presence.

Added comprehensive tests for BOM handling in both lyrics and playlists, including test fixtures with actual BOM markers to verify correct behavior.

* test: add test for UTF-16 LE encoded LRC files

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
2025-10-31 09:07:23 -04:00

147 lines
4.3 KiB
Go

package lyrics
import (
"context"
"encoding/json"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/gg"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("sources", func() {
ctx := context.Background()
Describe("fromEmbedded", func() {
It("should return nothing for a media file with no lyrics", func() {
mf := model.MediaFile{}
lyrics, err := fromEmbedded(ctx, &mf)
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(0))
})
It("should return lyrics for a media file with well-formatted lyrics", func() {
const syncedLyrics = "[00:18.80]We're no strangers to love\n[00:22.801]You know the rules and so do I"
const unsyncedLyrics = "We're no strangers to love\nYou know the rules and so do I"
synced, _ := model.ToLyrics("eng", syncedLyrics)
unsynced, _ := model.ToLyrics("xxx", unsyncedLyrics)
expectedList := model.LyricList{*synced, *unsynced}
lyricsJson, err := json.Marshal(expectedList)
Expect(err).ToNot(HaveOccurred())
mf := model.MediaFile{
Lyrics: string(lyricsJson),
}
lyrics, err := fromEmbedded(ctx, &mf)
Expect(err).To(BeNil())
Expect(lyrics).ToNot(BeNil())
Expect(lyrics).To(Equal(expectedList))
})
It("should return an error if somehow the JSON is bad", func() {
mf := model.MediaFile{Lyrics: "["}
lyrics, err := fromEmbedded(ctx, &mf)
Expect(lyrics).To(HaveLen(0))
Expect(err).ToNot(BeNil())
})
})
Describe("fromExternalFile", func() {
It("should return nil for lyrics that don't exist", func() {
mf := model.MediaFile{Path: "tests/fixtures/01 Invisible (RED) Edit Version.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(0))
})
It("should return synchronized lyrics from a file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).To(Equal(model.LyricList{
model.Lyrics{
DisplayArtist: "Rick Astley",
DisplayTitle: "That one song",
Lang: "eng",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "We're no strangers to love",
},
{
Start: gg.P(int64(22801)),
Value: "You know the rules and so do I",
},
},
Offset: gg.P(int64(-100)),
Synced: true,
},
}))
})
It("should return unsynchronized lyrics from a file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".txt")
Expect(err).To(BeNil())
Expect(lyrics).To(Equal(model.LyricList{
model.Lyrics{
Lang: "xxx",
Line: []model.Line{
{
Value: "We're no strangers to love",
},
{
Value: "You know the rules and so do I",
},
},
Synced: false,
},
}))
})
It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() {
// The function looks for <basePath-without-ext><suffix>, so we need to pass
// a MediaFile with .mp3 path and look for .lrc suffix
mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).ToNot(BeNil())
Expect(lyrics).To(HaveLen(1))
// The critical assertion: even with BOM, synced should be true
Expect(lyrics[0].Synced).To(BeTrue(), "Lyrics with BOM marker should be recognized as synced")
Expect(lyrics[0].Line).To(HaveLen(1))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0))))
Expect(lyrics[0].Line[0].Value).To(ContainSubstring("作曲"))
})
It("should handle UTF-16 LE encoded LRC files", func() {
mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).ToNot(BeNil())
Expect(lyrics).To(HaveLen(1))
// UTF-16 should be properly converted to UTF-8
Expect(lyrics[0].Synced).To(BeTrue(), "UTF-16 encoded lyrics should be recognized as synced")
Expect(lyrics[0].Line).To(HaveLen(2))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800))))
Expect(lyrics[0].Line[0].Value).To(Equal("We're no strangers to love"))
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I"))
})
})
})