diff --git a/core/lyrics/sources.go b/core/lyrics/sources.go index 6d4a4cc6f..857dc2eef 100644 --- a/core/lyrics/sources.go +++ b/core/lyrics/sources.go @@ -8,6 +8,7 @@ import ( "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/model" + "github.com/navidrome/navidrome/utils/ioutils" ) func fromEmbedded(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) { @@ -27,8 +28,7 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) ( externalLyric := basePath[0:len(basePath)-len(ext)] + suffix - contents, err := os.ReadFile(externalLyric) - + contents, err := ioutils.UTF8ReadFile(externalLyric) if errors.Is(err, os.ErrNotExist) { log.Trace(ctx, "no lyrics found at path", "path", externalLyric) return nil, nil diff --git a/core/lyrics/sources_test.go b/core/lyrics/sources_test.go index e92564c00..b3d502101 100644 --- a/core/lyrics/sources_test.go +++ b/core/lyrics/sources_test.go @@ -108,5 +108,39 @@ var _ = Describe("sources", func() { }, })) }) + + It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() { + // The function looks for , so we need to pass + // a MediaFile with .mp3 path and look for .lrc suffix + mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".lrc") + + Expect(err).To(BeNil()) + Expect(lyrics).ToNot(BeNil()) + Expect(lyrics).To(HaveLen(1)) + + // The critical assertion: even with BOM, synced should be true + Expect(lyrics[0].Synced).To(BeTrue(), "Lyrics with BOM marker should be recognized as synced") + Expect(lyrics[0].Line).To(HaveLen(1)) + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0)))) + Expect(lyrics[0].Line[0].Value).To(ContainSubstring("作曲")) + }) + + It("should handle UTF-16 LE encoded LRC files", func() { + mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".lrc") + + Expect(err).To(BeNil()) + Expect(lyrics).ToNot(BeNil()) + Expect(lyrics).To(HaveLen(1)) + + // UTF-16 should be properly converted to UTF-8 + Expect(lyrics[0].Synced).To(BeTrue(), "UTF-16 encoded lyrics should be recognized as synced") + Expect(lyrics[0].Line).To(HaveLen(2)) + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800)))) + Expect(lyrics[0].Line[0].Value).To(Equal("We're no strangers to love")) + Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801)))) + Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I")) + }) }) }) diff --git a/core/playlists.go b/core/playlists.go index 2eebc94e7..f98179f88 100644 --- a/core/playlists.go +++ b/core/playlists.go @@ -20,6 +20,7 @@ import ( "github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/model/criteria" "github.com/navidrome/navidrome/model/request" + "github.com/navidrome/navidrome/utils/ioutils" "github.com/navidrome/navidrome/utils/slice" "golang.org/x/text/unicode/norm" ) @@ -97,12 +98,13 @@ func (s *playlists) parsePlaylist(ctx context.Context, playlistFile string, fold } defer file.Close() + reader := ioutils.UTF8Reader(file) extension := strings.ToLower(filepath.Ext(playlistFile)) switch extension { case ".nsp": - err = s.parseNSP(ctx, pls, file) + err = s.parseNSP(ctx, pls, reader) default: - err = s.parseM3U(ctx, pls, folder, file) + err = s.parseM3U(ctx, pls, folder, reader) } return pls, err } diff --git a/core/playlists_test.go b/core/playlists_test.go index 399210ac8..fb42f9c9f 100644 --- a/core/playlists_test.go +++ b/core/playlists_test.go @@ -74,6 +74,24 @@ var _ = Describe("Playlists", func() { Expect(err).ToNot(HaveOccurred()) Expect(pls.Tracks).To(HaveLen(2)) }) + + It("parses playlists with UTF-8 BOM marker", func() { + pls, err := ps.ImportFile(ctx, folder, "bom-test.m3u") + Expect(err).ToNot(HaveOccurred()) + Expect(pls.OwnerID).To(Equal("123")) + Expect(pls.Name).To(Equal("Test Playlist")) + Expect(pls.Tracks).To(HaveLen(1)) + Expect(pls.Tracks[0].Path).To(Equal("tests/fixtures/playlists/test.mp3")) + }) + + It("parses UTF-16 LE encoded playlists with BOM and converts to UTF-8", func() { + pls, err := ps.ImportFile(ctx, folder, "bom-test-utf16.m3u") + Expect(err).ToNot(HaveOccurred()) + Expect(pls.OwnerID).To(Equal("123")) + Expect(pls.Name).To(Equal("UTF-16 Test Playlist")) + Expect(pls.Tracks).To(HaveLen(1)) + Expect(pls.Tracks[0].Path).To(Equal("tests/fixtures/playlists/test.mp3")) + }) }) Describe("NSP", func() { diff --git a/tests/fixtures/bom-test.lrc b/tests/fixtures/bom-test.lrc new file mode 100644 index 000000000..223c37de0 --- /dev/null +++ b/tests/fixtures/bom-test.lrc @@ -0,0 +1,4 @@ +[00:00.00] 作曲 : 柏大輔 +NOTE: This file intentionally contains a UTF-8 BOM (Byte Order Mark) at byte 0. +This tests BOM handling in lyrics parsing (GitHub issue #4631). +The BOM bytes are: 0xEF 0xBB 0xBF \ No newline at end of file diff --git a/tests/fixtures/bom-utf16-test.lrc b/tests/fixtures/bom-utf16-test.lrc new file mode 100644 index 000000000..e40ea3255 Binary files /dev/null and b/tests/fixtures/bom-utf16-test.lrc differ diff --git a/tests/fixtures/playlists/bom-test-utf16.m3u b/tests/fixtures/playlists/bom-test-utf16.m3u new file mode 100644 index 000000000..9c2e9d599 Binary files /dev/null and b/tests/fixtures/playlists/bom-test-utf16.m3u differ diff --git a/tests/fixtures/playlists/bom-test.m3u b/tests/fixtures/playlists/bom-test.m3u new file mode 100644 index 000000000..f5a00806c --- /dev/null +++ b/tests/fixtures/playlists/bom-test.m3u @@ -0,0 +1,6 @@ +#EXTM3U +# NOTE: This file intentionally contains a UTF-8 BOM (Byte Order Mark) at the beginning +# (bytes 0xEF 0xBB 0xBF) to test BOM handling in playlist parsing. +#PLAYLIST:Test Playlist +#EXTINF:123,Test Artist - Test Song +test.mp3 diff --git a/utils/ioutils/ioutils.go b/utils/ioutils/ioutils.go new file mode 100644 index 000000000..89d3997f3 --- /dev/null +++ b/utils/ioutils/ioutils.go @@ -0,0 +1,33 @@ +package ioutils + +import ( + "io" + "os" + + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" +) + +// UTF8Reader wraps an io.Reader to handle Byte Order Mark (BOM) properly. +// It strips UTF-8 BOM if present, and converts UTF-16 (LE/BE) to UTF-8. +// This is particularly useful for reading user-provided text files (like LRC lyrics, +// playlists) that may have been created on Windows, which often adds BOM markers. +// +// Reference: https://en.wikipedia.org/wiki/Byte_order_mark +func UTF8Reader(r io.Reader) io.Reader { + return transform.NewReader(r, unicode.BOMOverride(unicode.UTF8.NewDecoder())) +} + +// UTF8ReadFile reads the named file and returns its contents as a byte slice, +// automatically handling BOM markers. It's similar to os.ReadFile but strips +// UTF-8 BOM and converts UTF-16 encoded files to UTF-8. +func UTF8ReadFile(filename string) ([]byte, error) { + file, err := os.Open(filename) + if err != nil { + return nil, err + } + defer file.Close() + + reader := UTF8Reader(file) + return io.ReadAll(reader) +} diff --git a/utils/ioutils/ioutils_test.go b/utils/ioutils/ioutils_test.go new file mode 100644 index 000000000..7f5483879 --- /dev/null +++ b/utils/ioutils/ioutils_test.go @@ -0,0 +1,117 @@ +package ioutils + +import ( + "bytes" + "io" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestIOUtils(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "IO Utils Suite") +} + +var _ = Describe("UTF8Reader", func() { + Context("when reading text with UTF-8 BOM", func() { + It("strips the UTF-8 BOM marker", func() { + // UTF-8 BOM is EF BB BF + input := []byte{0xEF, 0xBB, 0xBF, 'h', 'e', 'l', 'l', 'o'} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hello")) + }) + + It("strips UTF-8 BOM from multi-line text", func() { + // Test with the actual LRC file format + input := []byte{0xEF, 0xBB, 0xBF, '[', '0', '0', ':', '0', '0', '.', '0', '0', ']', ' ', 't', 'e', 's', 't'} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("[00:00.00] test")) + }) + }) + + Context("when reading text without BOM", func() { + It("passes through unchanged", func() { + input := []byte("hello world") + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hello world")) + }) + }) + + Context("when reading UTF-16 LE encoded text", func() { + It("converts to UTF-8 and strips BOM", func() { + // UTF-16 LE BOM (FF FE) followed by "hi" in UTF-16 LE + input := []byte{0xFF, 0xFE, 'h', 0x00, 'i', 0x00} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hi")) + }) + }) + + Context("when reading UTF-16 BE encoded text", func() { + It("converts to UTF-8 and strips BOM", func() { + // UTF-16 BE BOM (FE FF) followed by "hi" in UTF-16 BE + input := []byte{0xFE, 0xFF, 0x00, 'h', 0x00, 'i'} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hi")) + }) + }) + + Context("when reading empty content", func() { + It("returns empty string", func() { + reader := UTF8Reader(bytes.NewReader([]byte{})) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("")) + }) + }) +}) + +var _ = Describe("UTF8ReadFile", func() { + Context("when reading a file with UTF-8 BOM", func() { + It("strips the BOM marker", func() { + // Use the actual fixture from issue #4631 + contents, err := UTF8ReadFile("../../tests/fixtures/bom-test.lrc") + Expect(err).ToNot(HaveOccurred()) + + // Should NOT start with BOM + Expect(contents[0]).ToNot(Equal(byte(0xEF))) + // Should start with '[' + Expect(contents[0]).To(Equal(byte('['))) + Expect(string(contents)).To(HavePrefix("[00:00.00]")) + }) + }) + + Context("when reading a file without BOM", func() { + It("reads the file normally", func() { + contents, err := UTF8ReadFile("../../tests/fixtures/test.lrc") + Expect(err).ToNot(HaveOccurred()) + + // Should contain the expected content + Expect(string(contents)).To(ContainSubstring("We're no strangers to love")) + }) + }) + + Context("when reading a non-existent file", func() { + It("returns an error", func() { + _, err := UTF8ReadFile("../../tests/fixtures/nonexistent.lrc") + Expect(err).To(HaveOccurred()) + }) + }) +})