Files
navidrome/utils/str/sanitize_strings_test.go
Deluan Quintão 7e083e0795 fix: split html sanitization from plaintext handling (#5403)
* fix: split html sanitization from plaintext handling

Add a dedicated SanitizeHTML helper for HTML-rendered values so entity-encoded markup is decoded before bluemonday sanitization. Use the new helper for the login welcome message and artist biographies while preserving SanitizeText semantics for lyrics and other plaintext callers. Add regression coverage for both helpers and the serveIndex welcomeMessage path.

* docs: add SanitizeText and SanitizeHTML godoc

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: preserve plain text in artist biographies

Revert artist biography storage to SanitizeText so entity-encoded plain text remains decoded for Subsonic consumers. This avoids double-escaping values like R&B in XML responses while keeping the new welcomeMessage HTML sanitization in place, and adds a regression test covering the biography storage behavior.

---------

Signed-off-by: Deluan <deluan@navidrome.org>
2026-04-23 17:53:28 -04:00

139 lines
4.5 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package str_test
import (
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/utils/str"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Sanitize Strings", func() {
Describe("SanitizeStrings", func() {
It("returns all lowercase chars", func() {
Expect(str.SanitizeStrings("Some Text")).To(Equal("some text"))
})
It("removes accents", func() {
Expect(str.SanitizeStrings("Quintão")).To(Equal("quintao"))
})
It("remove extra spaces", func() {
Expect(str.SanitizeStrings(" some text ", "text some")).To(Equal("some text"))
})
It("remove duplicated words", func() {
Expect(str.SanitizeStrings("legião urbana", "urbana legiÃo")).To(Equal("legiao urbana"))
})
It("remove symbols", func() {
Expect(str.SanitizeStrings("Toms Diner ' “40” A")).To(Equal("40 a diner toms"))
})
It("remove opening brackets", func() {
Expect(str.SanitizeStrings("[Five Years]")).To(Equal("five years"))
})
It("remove slashes", func() {
Expect(str.SanitizeStrings("folder/file\\yyyy")).To(Equal("file folder yyyy"))
})
It("normalizes utf chars", func() {
// These uses different types of hyphens
Expect(str.SanitizeStrings("k—os", "kos")).To(Equal("k-os"))
})
It("remove commas", func() {
// This is specially useful for handling cases where the Sort field uses comma.
// It reduces the size of the resulting string, thus reducing the size of the DB table and indexes.
Expect(str.SanitizeStrings("Bob Marley", "Marley, Bob")).To(Equal("bob marley"))
})
})
Describe("SanitizeFieldForSorting", func() {
BeforeEach(func() {
conf.Server.IgnoredArticles = "The O"
})
It("sanitize accents", func() {
Expect(str.SanitizeFieldForSorting("Céu")).To(Equal("ceu"))
})
It("removes articles", func() {
Expect(str.SanitizeFieldForSorting("The Beatles")).To(Equal("the beatles"))
})
It("removes accented articles", func() {
Expect(str.SanitizeFieldForSorting("Õ Blésq Blom")).To(Equal("o blesq blom"))
})
})
Describe("SanitizeText", func() {
It("preserves decoded plaintext", func() {
Expect(str.SanitizeText("Tom &amp; Jerry")).To(Equal("Tom & Jerry"))
Expect(str.SanitizeText("Tom & Jerry")).To(Equal("Tom & Jerry"))
})
It("keeps entity-encoded html readable", func() {
Expect(str.SanitizeText(`&lt;b&gt;ok&lt;/b&gt;`)).To(Equal("<b>ok</b>"))
})
})
Describe("SanitizeHTML", func() {
It("removes dangerous content from raw html", func() {
sanitized := str.SanitizeHTML(`<img src=x onerror=alert(1)><script>alert(2)</script><b>ok</b>`)
Expect(sanitized).To(ContainSubstring("<b>ok</b>"))
Expect(sanitized).ToNot(ContainSubstring("onerror"))
Expect(sanitized).ToNot(ContainSubstring("<script"))
})
It("removes dangerous content from entity-encoded html", func() {
sanitized := str.SanitizeHTML(`&lt;img src=x onerror=alert(1)&gt;&lt;script&gt;alert(2)&lt;/script&gt;&lt;b&gt;ok&lt;/b&gt;`)
Expect(sanitized).To(ContainSubstring("<b>ok</b>"))
Expect(sanitized).ToNot(ContainSubstring("onerror"))
Expect(sanitized).ToNot(ContainSubstring("<script"))
})
})
Describe("SanitizeFieldForSortingNoArticle", func() {
BeforeEach(func() {
conf.Server.IgnoredArticles = "The O"
})
It("sanitize accents", func() {
Expect(str.SanitizeFieldForSortingNoArticle("Céu")).To(Equal("ceu"))
})
It("removes articles", func() {
Expect(str.SanitizeFieldForSortingNoArticle("The Beatles")).To(Equal("beatles"))
})
It("removes accented articles", func() {
Expect(str.SanitizeFieldForSortingNoArticle("Õ Blésq Blom")).To(Equal("blesq blom"))
})
})
Describe("RemoveArticle", func() {
Context("Empty articles list", func() {
BeforeEach(func() {
conf.Server.IgnoredArticles = ""
})
It("returns empty if string is empty", func() {
Expect(str.RemoveArticle("")).To(BeEmpty())
})
It("returns same string", func() {
Expect(str.RemoveArticle("The Beatles")).To(Equal("The Beatles"))
})
})
Context("Default articles", func() {
BeforeEach(func() {
conf.Server.IgnoredArticles = "The El La Los Las Le Les Os As O A"
})
It("returns empty if string is empty", func() {
Expect(str.RemoveArticle("")).To(BeEmpty())
})
It("remove prefix article from string", func() {
Expect(str.RemoveArticle("Os Paralamas do Sucesso")).To(Equal("Paralamas do Sucesso"))
})
It("does not remove article if it is part of the first word", func() {
Expect(str.RemoveArticle("Thelonious Monk")).To(Equal("Thelonious Monk"))
})
})
})
})