From 9d4ec051e7eb9efde78ebaffcb48a0bcb9bf6d81 Mon Sep 17 00:00:00 2001
From: Pascal Bleser
Date: Fri, 17 Oct 2025 17:18:48 +0200
Subject: [PATCH] groupware: add email HTML sanitization
* sanitize email text/html body parts using bluemonday
* deps(groupware):
- new dependency: github.com/microcosm-cc/bluemonday
- transitive dependencies:
- github.com/aymerick/douceur
- github.com/gorilla/css
---
go.mod | 3 +
go.sum | 6 +
.../pkg/groupware/groupware_api_emails.go | 87 +-
.../pkg/groupware/groupware_framework.go | 4 +
.../groupware/pkg/groupware/groupware_test.go | 34 +
vendor/github.com/aymerick/douceur/LICENSE | 22 +
.../aymerick/douceur/css/declaration.go | 60 +
.../github.com/aymerick/douceur/css/rule.go | 230 ++
.../aymerick/douceur/css/stylesheet.go | 25 +
.../aymerick/douceur/parser/parser.go | 409 ++++
vendor/github.com/gorilla/css/LICENSE | 28 +
vendor/github.com/gorilla/css/scanner/doc.go | 33 +
.../github.com/gorilla/css/scanner/scanner.go | 360 +++
.../microcosm-cc/bluemonday/CONTRIBUTING.md | 50 +
.../microcosm-cc/bluemonday/CREDITS.md | 8 +
.../microcosm-cc/bluemonday/LICENSE.md | 28 +
.../microcosm-cc/bluemonday/README.md | 386 ++++
.../microcosm-cc/bluemonday/SECURITY.md | 13 +
.../microcosm-cc/bluemonday/css/handlers.go | 2016 +++++++++++++++++
.../github.com/microcosm-cc/bluemonday/doc.go | 104 +
.../microcosm-cc/bluemonday/helpers.go | 300 +++
.../microcosm-cc/bluemonday/policies.go | 253 +++
.../microcosm-cc/bluemonday/policy.go | 990 ++++++++
.../microcosm-cc/bluemonday/sanitize.go | 1096 +++++++++
vendor/modules.txt | 11 +
25 files changed, 6537 insertions(+), 19 deletions(-)
create mode 100644 services/groupware/pkg/groupware/groupware_test.go
create mode 100644 vendor/github.com/aymerick/douceur/LICENSE
create mode 100644 vendor/github.com/aymerick/douceur/css/declaration.go
create mode 100644 vendor/github.com/aymerick/douceur/css/rule.go
create mode 100644 vendor/github.com/aymerick/douceur/css/stylesheet.go
create mode 100644 vendor/github.com/aymerick/douceur/parser/parser.go
create mode 100644 vendor/github.com/gorilla/css/LICENSE
create mode 100644 vendor/github.com/gorilla/css/scanner/doc.go
create mode 100644 vendor/github.com/gorilla/css/scanner/scanner.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/CONTRIBUTING.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/CREDITS.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/LICENSE.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/README.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/SECURITY.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/css/handlers.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/doc.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/helpers.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/policies.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/policy.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/sanitize.go
diff --git a/go.mod b/go.mod
index 839a08a52..a9d2d9748 100644
--- a/go.mod
+++ b/go.mod
@@ -142,6 +142,7 @@ require (
github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op // indirect
github.com/armon/go-radix v1.0.0 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
+ github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bitly/go-simplejson v0.5.0 // indirect
github.com/bits-and-blooms/bitset v1.22.0 // indirect
@@ -256,6 +257,7 @@ require (
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/renameio/v2 v2.0.1 // indirect
github.com/gookit/goutil v0.7.1 // indirect
+ github.com/gorilla/css v1.0.1 // indirect
github.com/gorilla/handlers v1.5.1 // indirect
github.com/gorilla/schema v1.4.1 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
@@ -301,6 +303,7 @@ require (
github.com/mattn/go-sqlite3 v1.14.33 // indirect
github.com/maxymania/go-system v0.0.0-20170110133659-647cc364bf0b // indirect
github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103 // indirect
+ github.com/microcosm-cc/bluemonday v1.0.27 // indirect
github.com/miekg/dns v1.1.57 // indirect
github.com/mileusna/useragent v1.3.5 // indirect
github.com/minio/crc64nvme v1.1.1 // indirect
diff --git a/go.sum b/go.sum
index 95f9ec314..2d79bf545 100644
--- a/go.sum
+++ b/go.sum
@@ -138,6 +138,8 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkY
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/aws/aws-sdk-go v1.37.27/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
+github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
+github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/bbalet/stopwords v1.0.0 h1:0TnGycCtY0zZi4ltKoOGRFIlZHv0WqpoIGUsObjztfo=
github.com/bbalet/stopwords v1.0.0/go.mod h1:sAWrQoDMfqARGIn4s6dp7OW7ISrshUD8IP2q3KoqPjc=
github.com/beevik/etree v1.6.0 h1:u8Kwy8pp9D9XeITj2Z0XtA5qqZEmtJtuXZRQi+j03eE=
@@ -632,6 +634,8 @@ github.com/gophercloud/gophercloud v0.16.0/go.mod h1:wRtmUelyIIv3CSSDI47aUwbs075
github.com/gophercloud/utils v0.0.0-20210216074907-f6de111f2eae/go.mod h1:wx8HMD8oQD0Ryhz6+6ykq75PJ79iPyEqYHfwZ4l7OsA=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
+github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
+github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
@@ -876,6 +880,8 @@ github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103 h1:Z/i1e+gTZrmcGeZy
github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103/go.mod h1:o9YPB5aGP8ob35Vy6+vyq3P3bWe7NQWzf+JLiXCiMaE=
github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE=
github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A=
+github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
+github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.40/go.mod h1:KNUDUusw/aVsxyTYZM1oqvCicbwhgbNgztCETuNZ7xM=
github.com/miekg/dns v1.1.57 h1:Jzi7ApEIzwEPLHWRcafCN9LZSBbqQpxjt/wpgvg7wcM=
diff --git a/services/groupware/pkg/groupware/groupware_api_emails.go b/services/groupware/pkg/groupware/groupware_api_emails.go
index 5576885aa..1ab735c2e 100644
--- a/services/groupware/pkg/groupware/groupware_api_emails.go
+++ b/services/groupware/pkg/groupware/groupware_api_emails.go
@@ -11,6 +11,7 @@ import (
"time"
"github.com/go-chi/chi/v5"
+ "github.com/microcosm-cc/bluemonday"
"github.com/rs/zerolog"
"github.com/opencloud-eu/opencloud/pkg/jmap"
@@ -77,12 +78,12 @@ func (g *Groupware) GetAllEmailsInMailbox(w http.ResponseWriter, r *http.Request
logger := log.From(req.logger.With().Str(HeaderSince, log.SafeString(since)).Str(logAccountId, log.SafeString(accountId)))
- emails, sessionState, lang, jerr := g.jmap.GetMailboxChanges(accountId, req.session, req.ctx, logger, req.language(), mailboxId, since, true, g.maxBodyValueBytes, maxChanges)
+ changes, sessionState, lang, jerr := g.jmap.GetMailboxChanges(accountId, req.session, req.ctx, logger, req.language(), mailboxId, since, true, g.maxBodyValueBytes, maxChanges)
if jerr != nil {
return req.errorResponseFromJmap(jerr)
}
- return etagResponse(emails, sessionState, emails.State, lang)
+ return etagResponse(changes, sessionState, changes.State, lang)
})
} else {
g.respond(w, r, func(req Request) Response {
@@ -119,7 +120,15 @@ func (g *Groupware) GetAllEmailsInMailbox(w http.ResponseWriter, r *http.Request
return req.errorResponseFromJmap(jerr)
}
- return etagResponse(emails, sessionState, emails.State, lang)
+ safe := jmap.Emails{
+ Emails: g.sanitizeEmails(emails.Emails),
+ Total: emails.Total,
+ Limit: emails.Limit,
+ Offset: emails.Offset,
+ State: emails.State,
+ }
+
+ return etagResponse(safe, sessionState, emails.State, lang)
})
}
}
@@ -147,7 +156,7 @@ func (g *Groupware) GetEmailsById(w http.ResponseWriter, r *http.Request) {
if len(emails.Emails) < 1 {
return notFoundResponse(sessionState)
} else {
- return etagResponse(emails.Emails[0], sessionState, emails.State, lang)
+ return etagResponse(g.sanitizeEmail(emails.Emails[0]), sessionState, emails.State, lang)
}
} else {
logger := log.From(l.Array("ids", log.SafeStringArray(ids)))
@@ -158,7 +167,7 @@ func (g *Groupware) GetEmailsById(w http.ResponseWriter, r *http.Request) {
if len(emails.Emails) < 1 {
return notFoundResponse(sessionState)
} else {
- return etagResponse(emails.Emails, sessionState, emails.State, lang)
+ return etagResponse(g.sanitizeEmails(emails.Emails), sessionState, emails.State, lang)
}
}
})
@@ -203,7 +212,7 @@ func (g *Groupware) GetEmailAttachments(w http.ResponseWriter, r *http.Request)
if len(emails.Emails) < 1 {
return notFoundResponse(sessionState)
}
- email := emails.Emails[0]
+ email := g.sanitizeEmail(emails.Emails[0])
return etagResponse(email.Attachments, sessionState, emails.State, lang)
})
} else {
@@ -229,7 +238,7 @@ func (g *Groupware) GetEmailAttachments(w http.ResponseWriter, r *http.Request)
return nil
}
- email := emails.Emails[0]
+ email := g.sanitizeEmail(emails.Emails[0])
var attachment *jmap.EmailBodyPart = nil
for _, part := range email.Attachments {
if attachmentSelector(part) {
@@ -302,12 +311,12 @@ func (g *Groupware) getEmailsSince(w http.ResponseWriter, r *http.Request, since
logger := log.From(l)
- emails, sessionState, lang, jerr := g.jmap.GetEmailsSince(accountId, req.session, req.ctx, logger, req.language(), since, true, g.maxBodyValueBytes, maxChanges)
+ changes, sessionState, lang, jerr := g.jmap.GetEmailsSince(accountId, req.session, req.ctx, logger, req.language(), since, true, g.maxBodyValueBytes, maxChanges)
if jerr != nil {
return req.errorResponseFromJmap(jerr)
}
- return etagResponse(emails, sessionState, emails.State, lang)
+ return etagResponse(changes, sessionState, changes.State, lang)
})
}
@@ -518,8 +527,6 @@ func (g *Groupware) searchEmails(w http.ResponseWriter, r *http.Request) {
}
logger = log.From(logger.With().Str(logAccountId, log.SafeString(accountId)))
- g.jmap.QueryEmails([]string{accountId}, filter, req.session, req.ctx, logger, req.language(), offset, limit, fetchBodies, g.maxBodyValueBytes)
-
resultsByAccount, sessionState, lang, jerr := g.jmap.QueryEmailsWithSnippets([]string{accountId}, filter, req.session, req.ctx, logger, req.language(), offset, limit, fetchBodies, g.maxBodyValueBytes)
if jerr != nil {
return req.errorResponseFromJmap(jerr)
@@ -542,7 +549,7 @@ func (g *Groupware) searchEmails(w http.ResponseWriter, r *http.Request) {
}
flattened[i] = EmailWithSnippets{
// AccountId: accountId,
- Email: result.Email,
+ Email: g.sanitizeEmail(result.Email),
Snippets: snippets,
}
}
@@ -653,7 +660,7 @@ func (g *Groupware) GetEmailsForAllAccounts(w http.ResponseWriter, r *http.Reque
})
flattened[i] = EmailWithSnippets{
AccountId: accountId,
- Email: result.Email,
+ Email: g.sanitizeEmail(result.Email),
Snippets: snippets,
}
}
@@ -701,7 +708,7 @@ func (g *Groupware) GetEmailsForAllAccounts(w http.ResponseWriter, r *http.Reque
i := 0
for _, list := range resultsByAccountId {
for _, e := range list.Emails {
- flattened[i] = e
+ flattened[i] = g.sanitizeEmail(e)
i++
}
}
@@ -1196,7 +1203,7 @@ type AboutEmailResponse struct {
Language jmap.Language `json:"lang"`
}
-func relatedEmails(email jmap.Email, beacon time.Time, days uint) jmap.EmailFilterElement {
+func relatedEmailsFilter(email jmap.Email, beacon time.Time, days uint) jmap.EmailFilterElement {
filters := []jmap.EmailFilterElement{}
for _, from := range email.From {
if from.Email != "" {
@@ -1283,7 +1290,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
beacon := email.ReceivedAt // TODO configurable: either relative to when the email was received, or relative to now
//beacon := time.Now()
- filter := relatedEmails(email, beacon, days)
+ filter := relatedEmailsFilter(email, beacon, days)
// bgctx, _ := context.WithTimeout(context.Background(), time.Duration(30)*time.Second) // TODO configurable
bgctx := context.Background()
@@ -1298,7 +1305,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
l.Error().Err(jerr).Msgf("failed to query %v emails", RelationTypeSameSender)
} else {
req.observe(g.metrics.EmailSameSenderDuration.WithLabelValues(req.session.JmapEndpoint), duration.Seconds())
- related := filterEmails(results.Emails, email)
+ related := g.sanitizeEmails(filterEmails(results.Emails, email))
l.Trace().Msgf("'%v' found %v other emails", RelationTypeSameSender, len(related))
if len(related) > 0 {
req.push(RelationEntityEmail, AboutEmailsEvent{Id: reqId, Emails: related, Source: RelationTypeSameSender, Language: lang})
@@ -1316,7 +1323,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
l.Error().Err(jerr).Msgf("failed to list %v emails", RelationTypeSameThread)
} else {
req.observe(g.metrics.EmailSameThreadDuration.WithLabelValues(req.session.JmapEndpoint), duration.Seconds())
- related := filterEmails(emails, email)
+ related := g.sanitizeEmails(filterEmails(emails, email))
l.Trace().Msgf("'%v' found %v other emails", RelationTypeSameThread, len(related))
if len(related) > 0 {
req.push(RelationEntityEmail, AboutEmailsEvent{Id: reqId, Emails: related, Source: RelationTypeSameThread, Language: lang})
@@ -1325,7 +1332,7 @@ func (g *Groupware) RelatedToEmail(w http.ResponseWriter, r *http.Request) {
})
return etagResponse(AboutEmailResponse{
- Email: email,
+ Email: g.sanitizeEmail(email),
RequestId: reqId,
}, sessionState, emails.State, lang)
})
@@ -1703,3 +1710,45 @@ func squashQueryState[V any](all map[string]V, mapper func(V) jmap.State) jmap.S
}
return jmap.State(strings.Join(parts, ","))
}
+
+var sanitizationPolicy *bluemonday.Policy = bluemonday.UGCPolicy()
+
+func (g *Groupware) sanitizeEmail(source jmap.Email) jmap.Email {
+ if !g.sanitize {
+ return source
+ }
+ memory := map[string]int{}
+ for _, ref := range []*[]jmap.EmailBodyPart{&source.HtmlBody, &source.TextBody} {
+ newBody := make([]jmap.EmailBodyPart, len(*ref))
+ for i, p := range *ref {
+ if p.Type == "text/html" {
+ if already, done := memory[p.PartId]; !done {
+ if part, ok := source.BodyValues[p.PartId]; ok {
+ safe := sanitizationPolicy.Sanitize(part.Value)
+ part.Value = safe
+ source.BodyValues[p.PartId] = part
+ newLen := len(safe)
+ memory[p.PartId] = newLen
+ p.Size = newLen
+ }
+ } else {
+ p.Size = already
+ }
+ }
+ newBody[i] = p
+ }
+ *ref = newBody
+ }
+ return source
+}
+
+func (g *Groupware) sanitizeEmails(source []jmap.Email) []jmap.Email {
+ if !g.sanitize {
+ return source
+ }
+ result := make([]jmap.Email, len(source))
+ for i, email := range source {
+ result[i] = g.sanitizeEmail(email)
+ }
+ return result
+}
diff --git a/services/groupware/pkg/groupware/groupware_framework.go b/services/groupware/pkg/groupware/groupware_framework.go
index 33613521e..33334257d 100644
--- a/services/groupware/pkg/groupware/groupware_framework.go
+++ b/services/groupware/pkg/groupware/groupware_framework.go
@@ -90,6 +90,7 @@ type Groupware struct {
logger *log.Logger
defaultEmailLimit uint
maxBodyValueBytes uint
+ sanitize bool
// Caches successful and failed Sessions by the username.
sessionCache sessionCache
jmap *jmap.Client
@@ -192,6 +193,8 @@ func NewGroupware(config *config.Config, logger *log.Logger, mux *chi.Mux, prome
insecureTls := true // TODO make configurable
+ sanitize := true // TODO make configurable
+
m := metrics.New(prometheusRegistry, logger)
// TODO add timeouts and other meaningful configuration settings for the HTTP client
@@ -339,6 +342,7 @@ func NewGroupware(config *config.Config, logger *log.Logger, mux *chi.Mux, prome
jmap: &jmapClient,
defaultEmailLimit: defaultEmailLimit,
maxBodyValueBytes: maxBodyValueBytes,
+ sanitize: sanitize,
eventChannel: eventChannel,
jobsChannel: jobsChannel,
jobCounter: atomic.Uint64{},
diff --git a/services/groupware/pkg/groupware/groupware_test.go b/services/groupware/pkg/groupware/groupware_test.go
new file mode 100644
index 000000000..1017a1af3
--- /dev/null
+++ b/services/groupware/pkg/groupware/groupware_test.go
@@ -0,0 +1,34 @@
+package groupware
+
+import (
+ "testing"
+
+ "github.com/opencloud-eu/opencloud/pkg/jmap"
+ "github.com/stretchr/testify/require"
+)
+
+func TestSanitizeEmail(t *testing.T) {
+ email := jmap.Email{
+ Subject: "test",
+ BodyValues: map[string]jmap.EmailBodyValue{
+ "koze92I1": {
+ Value: `Google`,
+ },
+ },
+ HtmlBody: []jmap.EmailBodyPart{
+ {
+ PartId: "koze92I1",
+ Type: "text/html",
+ Size: 65,
+ },
+ },
+ }
+
+ g := &Groupware{sanitize: true}
+
+ safe := g.sanitizeEmail(email)
+
+ require := require.New(t)
+ require.Equal(`Google`, safe.BodyValues["koze92I1"].Value)
+ require.Equal(57, safe.HtmlBody[0].Size)
+}
diff --git a/vendor/github.com/aymerick/douceur/LICENSE b/vendor/github.com/aymerick/douceur/LICENSE
new file mode 100644
index 000000000..6ce87cd37
--- /dev/null
+++ b/vendor/github.com/aymerick/douceur/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Aymerick JEHANNE
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/aymerick/douceur/css/declaration.go b/vendor/github.com/aymerick/douceur/css/declaration.go
new file mode 100644
index 000000000..61d29d335
--- /dev/null
+++ b/vendor/github.com/aymerick/douceur/css/declaration.go
@@ -0,0 +1,60 @@
+package css
+
+import "fmt"
+
+// Declaration represents a parsed style property
+type Declaration struct {
+ Property string
+ Value string
+ Important bool
+}
+
+// NewDeclaration instanciates a new Declaration
+func NewDeclaration() *Declaration {
+ return &Declaration{}
+}
+
+// Returns string representation of the Declaration
+func (decl *Declaration) String() string {
+ return decl.StringWithImportant(true)
+}
+
+// StringWithImportant returns string representation with optional !important part
+func (decl *Declaration) StringWithImportant(option bool) string {
+ result := fmt.Sprintf("%s: %s", decl.Property, decl.Value)
+
+ if option && decl.Important {
+ result += " !important"
+ }
+
+ result += ";"
+
+ return result
+}
+
+// Equal returns true if both Declarations are equals
+func (decl *Declaration) Equal(other *Declaration) bool {
+ return (decl.Property == other.Property) && (decl.Value == other.Value) && (decl.Important == other.Important)
+}
+
+//
+// DeclarationsByProperty
+//
+
+// DeclarationsByProperty represents sortable style declarations
+type DeclarationsByProperty []*Declaration
+
+// Implements sort.Interface
+func (declarations DeclarationsByProperty) Len() int {
+ return len(declarations)
+}
+
+// Implements sort.Interface
+func (declarations DeclarationsByProperty) Swap(i, j int) {
+ declarations[i], declarations[j] = declarations[j], declarations[i]
+}
+
+// Implements sort.Interface
+func (declarations DeclarationsByProperty) Less(i, j int) bool {
+ return declarations[i].Property < declarations[j].Property
+}
diff --git a/vendor/github.com/aymerick/douceur/css/rule.go b/vendor/github.com/aymerick/douceur/css/rule.go
new file mode 100644
index 000000000..b5a44b542
--- /dev/null
+++ b/vendor/github.com/aymerick/douceur/css/rule.go
@@ -0,0 +1,230 @@
+package css
+
+import (
+ "fmt"
+ "strings"
+)
+
+const (
+ indentSpace = 2
+)
+
+// RuleKind represents a Rule kind
+type RuleKind int
+
+// Rule kinds
+const (
+ QualifiedRule RuleKind = iota
+ AtRule
+)
+
+// At Rules than have Rules inside their block instead of Declarations
+var atRulesWithRulesBlock = []string{
+ "@document", "@font-feature-values", "@keyframes", "@media", "@supports",
+}
+
+// Rule represents a parsed CSS rule
+type Rule struct {
+ Kind RuleKind
+
+ // At Rule name (eg: "@media")
+ Name string
+
+ // Raw prelude
+ Prelude string
+
+ // Qualified Rule selectors parsed from prelude
+ Selectors []string
+
+ // Style properties
+ Declarations []*Declaration
+
+ // At Rule embedded rules
+ Rules []*Rule
+
+ // Current rule embedding level
+ EmbedLevel int
+}
+
+// NewRule instanciates a new Rule
+func NewRule(kind RuleKind) *Rule {
+ return &Rule{
+ Kind: kind,
+ }
+}
+
+// Returns string representation of rule kind
+func (kind RuleKind) String() string {
+ switch kind {
+ case QualifiedRule:
+ return "Qualified Rule"
+ case AtRule:
+ return "At Rule"
+ default:
+ return "WAT"
+ }
+}
+
+// EmbedsRules returns true if this rule embeds another rules
+func (rule *Rule) EmbedsRules() bool {
+ if rule.Kind == AtRule {
+ for _, atRuleName := range atRulesWithRulesBlock {
+ if rule.Name == atRuleName {
+ return true
+ }
+ }
+ }
+
+ return false
+}
+
+// Equal returns true if both rules are equals
+func (rule *Rule) Equal(other *Rule) bool {
+ if (rule.Kind != other.Kind) ||
+ (rule.Prelude != other.Prelude) ||
+ (rule.Name != other.Name) {
+ return false
+ }
+
+ if (len(rule.Selectors) != len(other.Selectors)) ||
+ (len(rule.Declarations) != len(other.Declarations)) ||
+ (len(rule.Rules) != len(other.Rules)) {
+ return false
+ }
+
+ for i, sel := range rule.Selectors {
+ if sel != other.Selectors[i] {
+ return false
+ }
+ }
+
+ for i, decl := range rule.Declarations {
+ if !decl.Equal(other.Declarations[i]) {
+ return false
+ }
+ }
+
+ for i, rule := range rule.Rules {
+ if !rule.Equal(other.Rules[i]) {
+ return false
+ }
+ }
+
+ return true
+}
+
+// Diff returns a string representation of rules differences
+func (rule *Rule) Diff(other *Rule) []string {
+ result := []string{}
+
+ if rule.Kind != other.Kind {
+ result = append(result, fmt.Sprintf("Kind: %s | %s", rule.Kind.String(), other.Kind.String()))
+ }
+
+ if rule.Prelude != other.Prelude {
+ result = append(result, fmt.Sprintf("Prelude: \"%s\" | \"%s\"", rule.Prelude, other.Prelude))
+ }
+
+ if rule.Name != other.Name {
+ result = append(result, fmt.Sprintf("Name: \"%s\" | \"%s\"", rule.Name, other.Name))
+ }
+
+ if len(rule.Selectors) != len(other.Selectors) {
+ result = append(result, fmt.Sprintf("Selectors: %v | %v", strings.Join(rule.Selectors, ", "), strings.Join(other.Selectors, ", ")))
+ } else {
+ for i, sel := range rule.Selectors {
+ if sel != other.Selectors[i] {
+ result = append(result, fmt.Sprintf("Selector: \"%s\" | \"%s\"", sel, other.Selectors[i]))
+ }
+ }
+ }
+
+ if len(rule.Declarations) != len(other.Declarations) {
+ result = append(result, fmt.Sprintf("Declarations Nb: %d | %d", len(rule.Declarations), len(other.Declarations)))
+ } else {
+ for i, decl := range rule.Declarations {
+ if !decl.Equal(other.Declarations[i]) {
+ result = append(result, fmt.Sprintf("Declaration: \"%s\" | \"%s\"", decl.String(), other.Declarations[i].String()))
+ }
+ }
+ }
+
+ if len(rule.Rules) != len(other.Rules) {
+ result = append(result, fmt.Sprintf("Rules Nb: %d | %d", len(rule.Rules), len(other.Rules)))
+ } else {
+
+ for i, rule := range rule.Rules {
+ if !rule.Equal(other.Rules[i]) {
+ result = append(result, fmt.Sprintf("Rule: \"%s\" | \"%s\"", rule.String(), other.Rules[i].String()))
+ }
+ }
+ }
+
+ return result
+}
+
+// Returns the string representation of a rule
+func (rule *Rule) String() string {
+ result := ""
+
+ if rule.Kind == QualifiedRule {
+ for i, sel := range rule.Selectors {
+ if i != 0 {
+ result += ", "
+ }
+ result += sel
+ }
+ } else {
+ // AtRule
+ result += fmt.Sprintf("%s", rule.Name)
+
+ if rule.Prelude != "" {
+ if result != "" {
+ result += " "
+ }
+ result += fmt.Sprintf("%s", rule.Prelude)
+ }
+ }
+
+ if (len(rule.Declarations) == 0) && (len(rule.Rules) == 0) {
+ result += ";"
+ } else {
+ result += " {\n"
+
+ if rule.EmbedsRules() {
+ for _, subRule := range rule.Rules {
+ result += fmt.Sprintf("%s%s\n", rule.indent(), subRule.String())
+ }
+ } else {
+ for _, decl := range rule.Declarations {
+ result += fmt.Sprintf("%s%s\n", rule.indent(), decl.String())
+ }
+ }
+
+ result += fmt.Sprintf("%s}", rule.indentEndBlock())
+ }
+
+ return result
+}
+
+// Returns identation spaces for declarations and rules
+func (rule *Rule) indent() string {
+ result := ""
+
+ for i := 0; i < ((rule.EmbedLevel + 1) * indentSpace); i++ {
+ result += " "
+ }
+
+ return result
+}
+
+// Returns identation spaces for end of block character
+func (rule *Rule) indentEndBlock() string {
+ result := ""
+
+ for i := 0; i < (rule.EmbedLevel * indentSpace); i++ {
+ result += " "
+ }
+
+ return result
+}
diff --git a/vendor/github.com/aymerick/douceur/css/stylesheet.go b/vendor/github.com/aymerick/douceur/css/stylesheet.go
new file mode 100644
index 000000000..6b32c2ec9
--- /dev/null
+++ b/vendor/github.com/aymerick/douceur/css/stylesheet.go
@@ -0,0 +1,25 @@
+package css
+
+// Stylesheet represents a parsed stylesheet
+type Stylesheet struct {
+ Rules []*Rule
+}
+
+// NewStylesheet instanciate a new Stylesheet
+func NewStylesheet() *Stylesheet {
+ return &Stylesheet{}
+}
+
+// Returns string representation of the Stylesheet
+func (sheet *Stylesheet) String() string {
+ result := ""
+
+ for _, rule := range sheet.Rules {
+ if result != "" {
+ result += "\n"
+ }
+ result += rule.String()
+ }
+
+ return result
+}
diff --git a/vendor/github.com/aymerick/douceur/parser/parser.go b/vendor/github.com/aymerick/douceur/parser/parser.go
new file mode 100644
index 000000000..6c4917ccf
--- /dev/null
+++ b/vendor/github.com/aymerick/douceur/parser/parser.go
@@ -0,0 +1,409 @@
+package parser
+
+import (
+ "errors"
+ "fmt"
+ "regexp"
+ "strings"
+
+ "github.com/gorilla/css/scanner"
+
+ "github.com/aymerick/douceur/css"
+)
+
+const (
+ importantSuffixRegexp = `(?i)\s*!important\s*$`
+)
+
+var (
+ importantRegexp *regexp.Regexp
+)
+
+// Parser represents a CSS parser
+type Parser struct {
+ scan *scanner.Scanner // Tokenizer
+
+ // Tokens parsed but not consumed yet
+ tokens []*scanner.Token
+
+ // Rule embedding level
+ embedLevel int
+}
+
+func init() {
+ importantRegexp = regexp.MustCompile(importantSuffixRegexp)
+}
+
+// NewParser instanciates a new parser
+func NewParser(txt string) *Parser {
+ return &Parser{
+ scan: scanner.New(txt),
+ }
+}
+
+// Parse parses a whole stylesheet
+func Parse(text string) (*css.Stylesheet, error) {
+ result, err := NewParser(text).ParseStylesheet()
+ if err != nil {
+ return nil, err
+ }
+
+ return result, nil
+}
+
+// ParseDeclarations parses CSS declarations
+func ParseDeclarations(text string) ([]*css.Declaration, error) {
+ result, err := NewParser(text).ParseDeclarations()
+ if err != nil {
+ return nil, err
+ }
+
+ return result, nil
+}
+
+// ParseStylesheet parses a stylesheet
+func (parser *Parser) ParseStylesheet() (*css.Stylesheet, error) {
+ result := css.NewStylesheet()
+
+ // Parse BOM
+ if _, err := parser.parseBOM(); err != nil {
+ return result, err
+ }
+
+ // Parse list of rules
+ rules, err := parser.ParseRules()
+ if err != nil {
+ return result, err
+ }
+
+ result.Rules = rules
+
+ return result, nil
+}
+
+// ParseRules parses a list of rules
+func (parser *Parser) ParseRules() ([]*css.Rule, error) {
+ result := []*css.Rule{}
+
+ inBlock := false
+ if parser.tokenChar("{") {
+ // parsing a block of rules
+ inBlock = true
+ parser.embedLevel++
+
+ parser.shiftToken()
+ }
+
+ for parser.tokenParsable() {
+ if parser.tokenIgnorable() {
+ parser.shiftToken()
+ } else if parser.tokenChar("}") {
+ if !inBlock {
+ errMsg := fmt.Sprintf("Unexpected } character: %s", parser.nextToken().String())
+ return result, errors.New(errMsg)
+ }
+
+ parser.shiftToken()
+ parser.embedLevel--
+
+ // finished
+ break
+ } else {
+ rule, err := parser.ParseRule()
+ if err != nil {
+ return result, err
+ }
+
+ rule.EmbedLevel = parser.embedLevel
+ result = append(result, rule)
+ }
+ }
+
+ return result, parser.err()
+}
+
+// ParseRule parses a rule
+func (parser *Parser) ParseRule() (*css.Rule, error) {
+ if parser.tokenAtKeyword() {
+ return parser.parseAtRule()
+ }
+
+ return parser.parseQualifiedRule()
+}
+
+// ParseDeclarations parses a list of declarations
+func (parser *Parser) ParseDeclarations() ([]*css.Declaration, error) {
+ result := []*css.Declaration{}
+
+ if parser.tokenChar("{") {
+ parser.shiftToken()
+ }
+
+ for parser.tokenParsable() {
+ if parser.tokenIgnorable() {
+ parser.shiftToken()
+ } else if parser.tokenChar("}") {
+ // end of block
+ parser.shiftToken()
+ break
+ } else {
+ declaration, err := parser.ParseDeclaration()
+ if err != nil {
+ return result, err
+ }
+
+ result = append(result, declaration)
+ }
+ }
+
+ return result, parser.err()
+}
+
+// ParseDeclaration parses a declaration
+func (parser *Parser) ParseDeclaration() (*css.Declaration, error) {
+ result := css.NewDeclaration()
+ curValue := ""
+
+ for parser.tokenParsable() {
+ if parser.tokenChar(":") {
+ result.Property = strings.TrimSpace(curValue)
+ curValue = ""
+
+ parser.shiftToken()
+ } else if parser.tokenChar(";") || parser.tokenChar("}") {
+ if result.Property == "" {
+ errMsg := fmt.Sprintf("Unexpected ; character: %s", parser.nextToken().String())
+ return result, errors.New(errMsg)
+ }
+
+ if importantRegexp.MatchString(curValue) {
+ result.Important = true
+ curValue = importantRegexp.ReplaceAllString(curValue, "")
+ }
+
+ result.Value = strings.TrimSpace(curValue)
+
+ if parser.tokenChar(";") {
+ parser.shiftToken()
+ }
+
+ // finished
+ break
+ } else {
+ token := parser.shiftToken()
+ curValue += token.Value
+ }
+ }
+
+ // log.Printf("[parsed] Declaration: %s", result.String())
+
+ return result, parser.err()
+}
+
+// Parse an At Rule
+func (parser *Parser) parseAtRule() (*css.Rule, error) {
+ // parse rule name (eg: "@import")
+ token := parser.shiftToken()
+
+ result := css.NewRule(css.AtRule)
+ result.Name = token.Value
+
+ for parser.tokenParsable() {
+ if parser.tokenChar(";") {
+ parser.shiftToken()
+
+ // finished
+ break
+ } else if parser.tokenChar("{") {
+ if result.EmbedsRules() {
+ // parse rules block
+ rules, err := parser.ParseRules()
+ if err != nil {
+ return result, err
+ }
+
+ result.Rules = rules
+ } else {
+ // parse declarations block
+ declarations, err := parser.ParseDeclarations()
+ if err != nil {
+ return result, err
+ }
+
+ result.Declarations = declarations
+ }
+
+ // finished
+ break
+ } else {
+ // parse prelude
+ prelude, err := parser.parsePrelude()
+ if err != nil {
+ return result, err
+ }
+
+ result.Prelude = prelude
+ }
+ }
+
+ // log.Printf("[parsed] Rule: %s", result.String())
+
+ return result, parser.err()
+}
+
+// Parse a Qualified Rule
+func (parser *Parser) parseQualifiedRule() (*css.Rule, error) {
+ result := css.NewRule(css.QualifiedRule)
+
+ for parser.tokenParsable() {
+ if parser.tokenChar("{") {
+ if result.Prelude == "" {
+ errMsg := fmt.Sprintf("Unexpected { character: %s", parser.nextToken().String())
+ return result, errors.New(errMsg)
+ }
+
+ // parse declarations block
+ declarations, err := parser.ParseDeclarations()
+ if err != nil {
+ return result, err
+ }
+
+ result.Declarations = declarations
+
+ // finished
+ break
+ } else {
+ // parse prelude
+ prelude, err := parser.parsePrelude()
+ if err != nil {
+ return result, err
+ }
+
+ result.Prelude = prelude
+ }
+ }
+
+ result.Selectors = strings.Split(result.Prelude, ",")
+ for i, sel := range result.Selectors {
+ result.Selectors[i] = strings.TrimSpace(sel)
+ }
+
+ // log.Printf("[parsed] Rule: %s", result.String())
+
+ return result, parser.err()
+}
+
+// Parse Rule prelude
+func (parser *Parser) parsePrelude() (string, error) {
+ result := ""
+
+ for parser.tokenParsable() && !parser.tokenEndOfPrelude() {
+ token := parser.shiftToken()
+ result += token.Value
+ }
+
+ result = strings.TrimSpace(result)
+
+ // log.Printf("[parsed] prelude: %s", result)
+
+ return result, parser.err()
+}
+
+// Parse BOM
+func (parser *Parser) parseBOM() (bool, error) {
+ if parser.nextToken().Type == scanner.TokenBOM {
+ parser.shiftToken()
+ return true, nil
+ }
+
+ return false, parser.err()
+}
+
+// Returns next token without removing it from tokens buffer
+func (parser *Parser) nextToken() *scanner.Token {
+ if len(parser.tokens) == 0 {
+ // fetch next token
+ nextToken := parser.scan.Next()
+
+ // log.Printf("[token] %s => %v", nextToken.Type.String(), nextToken.Value)
+
+ // queue it
+ parser.tokens = append(parser.tokens, nextToken)
+ }
+
+ return parser.tokens[0]
+}
+
+// Returns next token and remove it from the tokens buffer
+func (parser *Parser) shiftToken() *scanner.Token {
+ var result *scanner.Token
+
+ result, parser.tokens = parser.tokens[0], parser.tokens[1:]
+ return result
+}
+
+// Returns tokenizer error, or nil if no error
+func (parser *Parser) err() error {
+ if parser.tokenError() {
+ token := parser.nextToken()
+ return fmt.Errorf("Tokenizer error: %s", token.String())
+ }
+
+ return nil
+}
+
+// Returns true if next token is Error
+func (parser *Parser) tokenError() bool {
+ return parser.nextToken().Type == scanner.TokenError
+}
+
+// Returns true if next token is EOF
+func (parser *Parser) tokenEOF() bool {
+ return parser.nextToken().Type == scanner.TokenEOF
+}
+
+// Returns true if next token is a whitespace
+func (parser *Parser) tokenWS() bool {
+ return parser.nextToken().Type == scanner.TokenS
+}
+
+// Returns true if next token is a comment
+func (parser *Parser) tokenComment() bool {
+ return parser.nextToken().Type == scanner.TokenComment
+}
+
+// Returns true if next token is a CDO or a CDC
+func (parser *Parser) tokenCDOorCDC() bool {
+ switch parser.nextToken().Type {
+ case scanner.TokenCDO, scanner.TokenCDC:
+ return true
+ default:
+ return false
+ }
+}
+
+// Returns true if next token is ignorable
+func (parser *Parser) tokenIgnorable() bool {
+ return parser.tokenWS() || parser.tokenComment() || parser.tokenCDOorCDC()
+}
+
+// Returns true if next token is parsable
+func (parser *Parser) tokenParsable() bool {
+ return !parser.tokenEOF() && !parser.tokenError()
+}
+
+// Returns true if next token is an At Rule keyword
+func (parser *Parser) tokenAtKeyword() bool {
+ return parser.nextToken().Type == scanner.TokenAtKeyword
+}
+
+// Returns true if next token is given character
+func (parser *Parser) tokenChar(value string) bool {
+ token := parser.nextToken()
+ return (token.Type == scanner.TokenChar) && (token.Value == value)
+}
+
+// Returns true if next token marks the end of a prelude
+func (parser *Parser) tokenEndOfPrelude() bool {
+ return parser.tokenChar(";") || parser.tokenChar("{")
+}
diff --git a/vendor/github.com/gorilla/css/LICENSE b/vendor/github.com/gorilla/css/LICENSE
new file mode 100644
index 000000000..ee0d53cef
--- /dev/null
+++ b/vendor/github.com/gorilla/css/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2023 The Gorilla Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/vendor/github.com/gorilla/css/scanner/doc.go b/vendor/github.com/gorilla/css/scanner/doc.go
new file mode 100644
index 000000000..f19850e15
--- /dev/null
+++ b/vendor/github.com/gorilla/css/scanner/doc.go
@@ -0,0 +1,33 @@
+// Copyright 2012 The Gorilla Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package gorilla/css/scanner generates tokens for a CSS3 input.
+
+It follows the CSS3 specification located at:
+
+ http://www.w3.org/TR/css3-syntax/
+
+To use it, create a new scanner for a given CSS string and call Next() until
+the token returned has type TokenEOF or TokenError:
+
+ s := scanner.New(myCSS)
+ for {
+ token := s.Next()
+ if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError {
+ break
+ }
+ // Do something with the token...
+ }
+
+Following the CSS3 specification, an error can only occur when the scanner
+finds an unclosed quote or unclosed comment. In these cases the text becomes
+"untokenizable". Everything else is tokenizable and it is up to a parser
+to make sense of the token stream (or ignore nonsensical token sequences).
+
+Note: the scanner doesn't perform lexical analysis or, in other words, it
+doesn't care about the token context. It is intended to be used by a
+lexer or parser.
+*/
+package scanner
diff --git a/vendor/github.com/gorilla/css/scanner/scanner.go b/vendor/github.com/gorilla/css/scanner/scanner.go
new file mode 100644
index 000000000..25a7c6576
--- /dev/null
+++ b/vendor/github.com/gorilla/css/scanner/scanner.go
@@ -0,0 +1,360 @@
+// Copyright 2012 The Gorilla Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+import (
+ "fmt"
+ "regexp"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// tokenType identifies the type of lexical tokens.
+type tokenType int
+
+// String returns a string representation of the token type.
+func (t tokenType) String() string {
+ return tokenNames[t]
+}
+
+// Token represents a token and the corresponding string.
+type Token struct {
+ Type tokenType
+ Value string
+ Line int
+ Column int
+}
+
+// String returns a string representation of the token.
+func (t *Token) String() string {
+ if len(t.Value) > 10 {
+ return fmt.Sprintf("%s (line: %d, column: %d): %.10q...",
+ t.Type, t.Line, t.Column, t.Value)
+ }
+ return fmt.Sprintf("%s (line: %d, column: %d): %q",
+ t.Type, t.Line, t.Column, t.Value)
+}
+
+// All tokens -----------------------------------------------------------------
+
+// The complete list of tokens in CSS3.
+const (
+ // Scanner flags.
+ TokenError tokenType = iota
+ TokenEOF
+ // From now on, only tokens from the CSS specification.
+ TokenIdent
+ TokenAtKeyword
+ TokenString
+ TokenHash
+ TokenNumber
+ TokenPercentage
+ TokenDimension
+ TokenURI
+ TokenUnicodeRange
+ TokenCDO
+ TokenCDC
+ TokenS
+ TokenComment
+ TokenFunction
+ TokenIncludes
+ TokenDashMatch
+ TokenPrefixMatch
+ TokenSuffixMatch
+ TokenSubstringMatch
+ TokenChar
+ TokenBOM
+)
+
+// tokenNames maps tokenType's to their names. Used for conversion to string.
+var tokenNames = map[tokenType]string{
+ TokenError: "error",
+ TokenEOF: "EOF",
+ TokenIdent: "IDENT",
+ TokenAtKeyword: "ATKEYWORD",
+ TokenString: "STRING",
+ TokenHash: "HASH",
+ TokenNumber: "NUMBER",
+ TokenPercentage: "PERCENTAGE",
+ TokenDimension: "DIMENSION",
+ TokenURI: "URI",
+ TokenUnicodeRange: "UNICODE-RANGE",
+ TokenCDO: "CDO",
+ TokenCDC: "CDC",
+ TokenS: "S",
+ TokenComment: "COMMENT",
+ TokenFunction: "FUNCTION",
+ TokenIncludes: "INCLUDES",
+ TokenDashMatch: "DASHMATCH",
+ TokenPrefixMatch: "PREFIXMATCH",
+ TokenSuffixMatch: "SUFFIXMATCH",
+ TokenSubstringMatch: "SUBSTRINGMATCH",
+ TokenChar: "CHAR",
+ TokenBOM: "BOM",
+}
+
+// Macros and productions -----------------------------------------------------
+// http://www.w3.org/TR/css3-syntax/#tokenization
+
+var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`)
+
+// macros maps macro names to patterns to be expanded.
+var macros = map[string]string{
+ // must be escaped: `\.+*?()|[]{}^$`
+ "ident": `-?{nmstart}{nmchar}*`,
+ "name": `{nmchar}+`,
+ "nmstart": `[a-zA-Z_]|{nonascii}|{escape}`,
+ "nonascii": "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
+ "unicode": `\\[0-9a-fA-F]{1,6}{wc}?`,
+ "escape": "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
+ "nmchar": `[a-zA-Z0-9_-]|{nonascii}|{escape}`,
+ "num": `[0-9]*\.[0-9]+|[0-9]+`,
+ "string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
+ "stringchar": `{urlchar}|[ ]|\\{nl}`,
+ "nl": `[\n\r\f]|\r\n`,
+ "w": `{wc}*`,
+ "wc": `[\t\n\f\r ]`,
+
+ // urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
+ // ASCII characters range = `[\u0020-\u007e]`
+ // Skip space \u0020 = `[\u0021-\u007e]`
+ // Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
+ // Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
+ // Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
+ // Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
+ "urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
+}
+
+// productions maps the list of tokens to patterns to be expanded.
+var productions = map[tokenType]string{
+ // Unused regexps (matched using other methods) are commented out.
+ TokenIdent: `{ident}`,
+ TokenAtKeyword: `@{ident}`,
+ TokenString: `{string}`,
+ TokenHash: `#{name}`,
+ TokenNumber: `{num}`,
+ TokenPercentage: `{num}%`,
+ TokenDimension: `{num}{ident}`,
+ TokenURI: `url\({w}(?:{string}|{urlchar}*?){w}\)`,
+ TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`,
+ //TokenCDO: ``,
+ TokenS: `{wc}+`,
+ TokenComment: `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`,
+ TokenFunction: `{ident}\(`,
+ //TokenIncludes: `~=`,
+ //TokenDashMatch: `\|=`,
+ //TokenPrefixMatch: `\^=`,
+ //TokenSuffixMatch: `\$=`,
+ //TokenSubstringMatch: `\*=`,
+ //TokenChar: `[^"']`,
+ //TokenBOM: "\uFEFF",
+}
+
+// matchers maps the list of tokens to compiled regular expressions.
+//
+// The map is filled on init() using the macros and productions defined in
+// the CSS specification.
+var matchers = map[tokenType]*regexp.Regexp{}
+
+// matchOrder is the order to test regexps when first-char shortcuts
+// can't be used.
+var matchOrder = []tokenType{
+ TokenURI,
+ TokenFunction,
+ TokenUnicodeRange,
+ TokenIdent,
+ TokenDimension,
+ TokenPercentage,
+ TokenNumber,
+ TokenCDC,
+}
+
+func init() {
+ // replace macros and compile regexps for productions.
+ replaceMacro := func(s string) string {
+ return "(?:" + macros[s[1:len(s)-1]] + ")"
+ }
+ for t, s := range productions {
+ for macroRegexp.MatchString(s) {
+ s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro)
+ }
+ matchers[t] = regexp.MustCompile("^(?:" + s + ")")
+ }
+}
+
+// Scanner --------------------------------------------------------------------
+
+// New returns a new CSS scanner for the given input.
+func New(input string) *Scanner {
+ // Normalize newlines.
+ // https://www.w3.org/TR/css-syntax-3/#input-preprocessing
+ input = strings.Replace(input, "\r\n", "\n", -1)
+ input = strings.Replace(input, "\r", "\n", -1)
+ input = strings.Replace(input, "\f", "\n", -1)
+ input = strings.Replace(input, "\u0000", "\ufffd", -1)
+ return &Scanner{
+ input: input,
+ row: 1,
+ col: 1,
+ }
+}
+
+// Scanner scans an input and emits tokens following the CSS3 specification.
+type Scanner struct {
+ input string
+ pos int
+ row int
+ col int
+ err *Token
+}
+
+// Next returns the next token from the input.
+//
+// At the end of the input the token type is TokenEOF.
+//
+// If the input can't be tokenized the token type is TokenError. This occurs
+// in case of unclosed quotation marks or comments.
+func (s *Scanner) Next() *Token {
+ if s.err != nil {
+ return s.err
+ }
+ if s.pos >= len(s.input) {
+ s.err = &Token{TokenEOF, "", s.row, s.col}
+ return s.err
+ }
+ if s.pos == 0 {
+ // Test BOM only once, at the beginning of the file.
+ if strings.HasPrefix(s.input, "\uFEFF") {
+ return s.emitSimple(TokenBOM, "\uFEFF")
+ }
+ }
+ // There's a lot we can guess based on the first byte so we'll take a
+ // shortcut before testing multiple regexps.
+ input := s.input[s.pos:]
+ switch input[0] {
+ case '\t', '\n', ' ':
+ // Whitespace.
+ return s.emitToken(TokenS, matchers[TokenS].FindString(input))
+ case '.':
+ // Dot is too common to not have a quick check.
+ // We'll test if this is a Char; if it is followed by a number it is a
+ // dimension/percentage/number, and this will be matched later.
+ if len(input) > 1 && !unicode.IsDigit(rune(input[1])) {
+ return s.emitSimple(TokenChar, ".")
+ }
+ case '#':
+ // Another common one: Hash or Char.
+ if match := matchers[TokenHash].FindString(input); match != "" {
+ return s.emitToken(TokenHash, match)
+ }
+ return s.emitSimple(TokenChar, "#")
+ case '@':
+ // Another common one: AtKeyword or Char.
+ if match := matchers[TokenAtKeyword].FindString(input); match != "" {
+ return s.emitSimple(TokenAtKeyword, match)
+ }
+ return s.emitSimple(TokenChar, "@")
+ case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}':
+ // More common chars.
+ return s.emitSimple(TokenChar, string(input[0]))
+ case '"', '\'':
+ // String or error.
+ match := matchers[TokenString].FindString(input)
+ if match != "" {
+ return s.emitToken(TokenString, match)
+ }
+
+ s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
+ return s.err
+ case '/':
+ // Comment, error or Char.
+ if len(input) > 1 && input[1] == '*' {
+ match := matchers[TokenComment].FindString(input)
+ if match != "" {
+ return s.emitToken(TokenComment, match)
+ } else {
+ s.err = &Token{TokenError, "unclosed comment", s.row, s.col}
+ return s.err
+ }
+ }
+ return s.emitSimple(TokenChar, "/")
+ case '~':
+ // Includes or Char.
+ return s.emitPrefixOrChar(TokenIncludes, "~=")
+ case '|':
+ // DashMatch or Char.
+ return s.emitPrefixOrChar(TokenDashMatch, "|=")
+ case '^':
+ // PrefixMatch or Char.
+ return s.emitPrefixOrChar(TokenPrefixMatch, "^=")
+ case '$':
+ // SuffixMatch or Char.
+ return s.emitPrefixOrChar(TokenSuffixMatch, "$=")
+ case '*':
+ // SubstringMatch or Char.
+ return s.emitPrefixOrChar(TokenSubstringMatch, "*=")
+ case '<':
+ // CDO or Char.
+ return s.emitPrefixOrChar(TokenCDO, " which includes the use of that to permit
+// conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
+//
+// What is not permitted are CDATA XML comments, as the x/net/html package we depend
+// on does not handle this fully and we are not choosing to take on that work:
+// https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
+// package changes this then these will be considered, otherwise if you AllowComments
+// but provide a CDATA comment, then as per the documentation in x/net/html this will
+// be treated as a plain HTML comment.
+func (p *Policy) AllowComments() {
+ p.allowComments = true
+}
+
+// AllowNoAttrs says that attributes on element are optional.
+//
+// The attribute policy is only added to the core policy when OnElements(...)
+// are called.
+func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
+
+ p.init()
+
+ abp := attrPolicyBuilder{
+ p: p,
+ allowEmpty: true,
+ }
+ return &abp
+}
+
+// AllowNoAttrs says that attributes on element are optional.
+//
+// The attribute policy is only added to the core policy when OnElements(...)
+// are called.
+func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
+
+ abp.allowEmpty = true
+
+ return abp
+}
+
+// Matching allows a regular expression to be applied to a nascent attribute
+// policy, and returns the attribute policy.
+func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
+
+ abp.regexp = regex
+
+ return abp
+}
+
+// OnElements will bind an attribute policy to a given range of HTML elements
+// and return the updated policy
+func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
+
+ for _, element := range elements {
+ element = strings.ToLower(element)
+
+ for _, attr := range abp.attrNames {
+
+ if _, ok := abp.p.elsAndAttrs[element]; !ok {
+ abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
+ }
+
+ ap := attrPolicy{}
+ if abp.regexp != nil {
+ ap.regexp = abp.regexp
+ }
+
+ abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
+ }
+
+ if abp.allowEmpty {
+ abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
+
+ if _, ok := abp.p.elsAndAttrs[element]; !ok {
+ abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
+ }
+ }
+ }
+
+ return abp.p
+}
+
+// OnElementsMatching will bind an attribute policy to all elements matching a given regex
+// and return the updated policy
+func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
+ for _, attr := range abp.attrNames {
+ if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
+ abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
+ }
+ ap := attrPolicy{}
+ if abp.regexp != nil {
+ ap.regexp = abp.regexp
+ }
+ abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
+ }
+
+ if abp.allowEmpty {
+ abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
+ if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
+ abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
+ }
+ }
+
+ return abp.p
+}
+
+// Globally will bind an attribute policy to all HTML elements and return the
+// updated policy
+func (abp *attrPolicyBuilder) Globally() *Policy {
+
+ for _, attr := range abp.attrNames {
+ if _, ok := abp.p.globalAttrs[attr]; !ok {
+ abp.p.globalAttrs[attr] = []attrPolicy{}
+ }
+
+ ap := attrPolicy{}
+ if abp.regexp != nil {
+ ap.regexp = abp.regexp
+ }
+
+ abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
+ }
+
+ return abp.p
+}
+
+// AllowStyles takes a range of CSS property names and returns a
+// style policy builder that allows you to specify the pattern and scope of
+// the allowed property.
+//
+// The style policy is only added to the core policy when either Globally()
+// or OnElements(...) are called.
+func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
+
+ p.init()
+
+ abp := stylePolicyBuilder{
+ p: p,
+ }
+
+ for _, propertyName := range propertyNames {
+ abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
+ }
+
+ return &abp
+}
+
+// Matching allows a regular expression to be applied to a nascent style
+// policy, and returns the style policy.
+func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
+
+ spb.regexp = regex
+
+ return spb
+}
+
+// MatchingEnum allows a list of allowed values to be applied to a nascent style
+// policy, and returns the style policy.
+func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
+
+ spb.enum = enum
+
+ return spb
+}
+
+// MatchingHandler allows a handler to be applied to a nascent style
+// policy, and returns the style policy.
+func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
+
+ spb.handler = handler
+
+ return spb
+}
+
+// OnElements will bind a style policy to a given range of HTML elements
+// and return the updated policy
+func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
+
+ for _, element := range elements {
+ element = strings.ToLower(element)
+
+ for _, attr := range spb.propertyNames {
+
+ if _, ok := spb.p.elsAndStyles[element]; !ok {
+ spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
+ }
+
+ sp := stylePolicy{}
+ if spb.handler != nil {
+ sp.handler = spb.handler
+ } else if len(spb.enum) > 0 {
+ sp.enum = spb.enum
+ } else if spb.regexp != nil {
+ sp.regexp = spb.regexp
+ } else {
+ sp.handler = css.GetDefaultHandler(attr)
+ }
+ spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
+ }
+ }
+
+ return spb.p
+}
+
+// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
+// and return the updated policy
+func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
+
+ for _, attr := range spb.propertyNames {
+
+ if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
+ spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
+ }
+
+ sp := stylePolicy{}
+ if spb.handler != nil {
+ sp.handler = spb.handler
+ } else if len(spb.enum) > 0 {
+ sp.enum = spb.enum
+ } else if spb.regexp != nil {
+ sp.regexp = spb.regexp
+ } else {
+ sp.handler = css.GetDefaultHandler(attr)
+ }
+ spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
+ }
+
+ return spb.p
+}
+
+// Globally will bind a style policy to all HTML elements and return the
+// updated policy
+func (spb *stylePolicyBuilder) Globally() *Policy {
+
+ for _, attr := range spb.propertyNames {
+ if _, ok := spb.p.globalStyles[attr]; !ok {
+ spb.p.globalStyles[attr] = []stylePolicy{}
+ }
+
+ // Use only one strategy for validating styles, fallback to default
+ sp := stylePolicy{}
+ if spb.handler != nil {
+ sp.handler = spb.handler
+ } else if len(spb.enum) > 0 {
+ sp.enum = spb.enum
+ } else if spb.regexp != nil {
+ sp.regexp = spb.regexp
+ } else {
+ sp.handler = css.GetDefaultHandler(attr)
+ }
+ spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
+ }
+
+ return spb.p
+}
+
+// AllowElements will append HTML elements to the allowlist without applying an
+// attribute policy to those elements (the elements are permitted
+// sans-attributes)
+func (p *Policy) AllowElements(names ...string) *Policy {
+ p.init()
+
+ for _, element := range names {
+ element = strings.ToLower(element)
+
+ if _, ok := p.elsAndAttrs[element]; !ok {
+ p.elsAndAttrs[element] = make(map[string][]attrPolicy)
+ }
+ }
+
+ return p
+}
+
+// AllowElementsMatching will append HTML elements to the allowlist if they
+// match a regexp.
+func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
+ p.init()
+ if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
+ p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
+ }
+ return p
+}
+
+// AllowURLSchemesMatching will append URL schemes to the allowlist if they
+// match a regexp.
+func (p *Policy) AllowURLSchemesMatching(r *regexp.Regexp) *Policy {
+ p.allowURLSchemeRegexps = append(p.allowURLSchemeRegexps, r)
+ return p
+}
+
+// RewriteSrc will rewrite the src attribute of a resource downloading tag
+// (e.g.
, tag.
+func (p *Policy) addDefaultSkipElementContent() {
+ p.init()
+
+ p.setOfElementsToSkipContent["frame"] = struct{}{}
+ p.setOfElementsToSkipContent["frameset"] = struct{}{}
+ p.setOfElementsToSkipContent["iframe"] = struct{}{}
+ p.setOfElementsToSkipContent["noembed"] = struct{}{}
+ p.setOfElementsToSkipContent["noframes"] = struct{}{}
+ p.setOfElementsToSkipContent["noscript"] = struct{}{}
+ p.setOfElementsToSkipContent["nostyle"] = struct{}{}
+ p.setOfElementsToSkipContent["object"] = struct{}{}
+ p.setOfElementsToSkipContent["script"] = struct{}{}
+ p.setOfElementsToSkipContent["style"] = struct{}{}
+ p.setOfElementsToSkipContent["title"] = struct{}{}
+}
diff --git a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
new file mode 100644
index 000000000..47c31f7da
--- /dev/null
+++ b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
@@ -0,0 +1,1096 @@
+// Copyright (c) 2014, David Kitchen
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * Neither the name of the organisation (Microcosm) nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package bluemonday
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "net/url"
+ "regexp"
+ "strconv"
+ "strings"
+
+ "golang.org/x/net/html"
+
+ "github.com/aymerick/douceur/parser"
+)
+
+var (
+ dataAttribute = regexp.MustCompile("^data-.+")
+ dataAttributeXMLPrefix = regexp.MustCompile("^xml.+")
+ dataAttributeInvalidChars = regexp.MustCompile("[A-Z;]+")
+ cssUnicodeChar = regexp.MustCompile(`\\[0-9a-f]{1,6} ?`)
+ dataURIbase64Prefix = regexp.MustCompile(`^data:[^,]*;base64,`)
+)
+
+// Sanitize takes a string that contains a HTML fragment or document and applies
+// the given policy allowlist.
+//
+// It returns a HTML string that has been sanitized by the policy or an empty
+// string if an error has occurred (most likely as a consequence of extremely
+// malformed input)
+func (p *Policy) Sanitize(s string) string {
+ if strings.TrimSpace(s) == "" {
+ return s
+ }
+
+ return p.sanitizeWithBuff(strings.NewReader(s)).String()
+}
+
+// SanitizeBytes takes a []byte that contains a HTML fragment or document and applies
+// the given policy allowlist.
+//
+// It returns a []byte containing the HTML that has been sanitized by the policy
+// or an empty []byte if an error has occurred (most likely as a consequence of
+// extremely malformed input)
+func (p *Policy) SanitizeBytes(b []byte) []byte {
+ if len(bytes.TrimSpace(b)) == 0 {
+ return b
+ }
+
+ return p.sanitizeWithBuff(bytes.NewReader(b)).Bytes()
+}
+
+// SanitizeReader takes an io.Reader that contains a HTML fragment or document
+// and applies the given policy allowlist.
+//
+// It returns a bytes.Buffer containing the HTML that has been sanitized by the
+// policy. Errors during sanitization will merely return an empty result.
+func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
+ return p.sanitizeWithBuff(r)
+}
+
+// SanitizeReaderToWriter takes an io.Reader that contains a HTML fragment or document
+// and applies the given policy allowlist and writes to the provided writer returning
+// an error if there is one.
+func (p *Policy) SanitizeReaderToWriter(r io.Reader, w io.Writer) error {
+ return p.sanitize(r, w)
+}
+
+// Query represents a single part of the query string, a query param
+type Query struct {
+ Key string
+ Value string
+ HasValue bool
+}
+
+func parseQuery(query string) (values []Query, err error) {
+ // This is essentially a copy of parseQuery from
+ // https://golang.org/src/net/url/url.go but adjusted to build our values
+ // based on our type, which we need to preserve the ordering of the query
+ // string
+ for query != "" {
+ key := query
+ if i := strings.IndexAny(key, "&;"); i >= 0 {
+ key, query = key[:i], key[i+1:]
+ } else {
+ query = ""
+ }
+ if key == "" {
+ continue
+ }
+ value := ""
+ hasValue := false
+ if i := strings.Index(key, "="); i >= 0 {
+ key, value = key[:i], key[i+1:]
+ hasValue = true
+ }
+ key, err1 := url.QueryUnescape(key)
+ if err1 != nil {
+ if err == nil {
+ err = err1
+ }
+ continue
+ }
+ value, err1 = url.QueryUnescape(value)
+ if err1 != nil {
+ if err == nil {
+ err = err1
+ }
+ continue
+ }
+ values = append(values, Query{
+ Key: key,
+ Value: value,
+ HasValue: hasValue,
+ })
+ }
+ return values, err
+}
+
+func encodeQueries(queries []Query) string {
+ var buff bytes.Buffer
+ for i, query := range queries {
+ buff.WriteString(url.QueryEscape(query.Key))
+ if query.HasValue {
+ buff.WriteString("=")
+ buff.WriteString(url.QueryEscape(query.Value))
+ }
+ if i < len(queries)-1 {
+ buff.WriteString("&")
+ }
+ }
+ return buff.String()
+}
+
+func sanitizedURL(val string) (string, error) {
+ u, err := url.Parse(val)
+ if err != nil {
+ return "", err
+ }
+
+ // we use parseQuery but not u.Query to keep the order not change because
+ // url.Values is a map which has a random order.
+ queryValues, err := parseQuery(u.RawQuery)
+ if err != nil {
+ return "", err
+ }
+ // sanitize the url query params
+ for i, query := range queryValues {
+ queryValues[i].Key = html.EscapeString(query.Key)
+ }
+ u.RawQuery = encodeQueries(queryValues)
+ // u.String() will also sanitize host/scheme/user/pass
+ return u.String(), nil
+}
+
+// Performs the actual sanitization process.
+func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer {
+ var buff bytes.Buffer
+ if err := p.sanitize(r, &buff); err != nil {
+ return &bytes.Buffer{}
+ }
+ return &buff
+}
+
+type asStringWriter struct {
+ io.Writer
+}
+
+func (a *asStringWriter) WriteString(s string) (int, error) {
+ return a.Write([]byte(s))
+}
+
+func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
+ // It is possible that the developer has created the policy via:
+ // p := bluemonday.Policy{}
+ // rather than:
+ // p := bluemonday.NewPolicy()
+ // If this is the case, and if they haven't yet triggered an action that
+ // would initialize the maps, then we need to do that.
+ p.init()
+
+ buff, ok := w.(stringWriterWriter)
+ if !ok {
+ buff = &asStringWriter{w}
+ }
+
+ var (
+ skipElementContent bool
+ skippingElementsCount int64
+ skipClosingTag bool
+ closingTagToSkipStack []string
+ mostRecentlyStartedToken string
+ )
+
+ tokenizer := html.NewTokenizer(r)
+ for {
+ if tokenizer.Next() == html.ErrorToken {
+ err := tokenizer.Err()
+ if err == io.EOF {
+ // End of input means end of processing
+ return nil
+ }
+
+ // Raw tokenizer error
+ return err
+ }
+
+ token := tokenizer.Token()
+ switch token.Type {
+ case html.DoctypeToken:
+
+ // DocType is not handled as there is no safe parsing mechanism
+ // provided by golang.org/x/net/html for the content, and this can
+ // be misused to insert HTML tags that are not then sanitized
+ //
+ // One might wish to recursively sanitize here using the same policy
+ // but I will need to do some further testing before considering
+ // this.
+
+ case html.CommentToken:
+
+ // Comments are ignored by default
+ if p.allowComments {
+ // But if allowed then write the comment out as-is
+ buff.WriteString(token.String())
+ }
+
+ case html.StartTagToken:
+
+ mostRecentlyStartedToken = normaliseElementName(token.Data)
+
+ switch normaliseElementName(token.Data) {
+ case `script`:
+ if !p.allowUnsafe {
+ continue
+ }
+ case `style`:
+ if !p.allowUnsafe {
+ continue
+ }
+ }
+
+ aps, ok := p.elsAndAttrs[token.Data]
+ if !ok {
+ aa, matched := p.matchRegex(token.Data)
+ if !matched {
+ if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
+ skipElementContent = true
+ skippingElementsCount++
+ }
+ if p.addSpaces {
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
+ }
+ break
+ }
+ aps = aa
+ }
+ if len(token.Attr) != 0 {
+ token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
+ }
+
+ if len(token.Attr) == 0 {
+ if !p.allowNoAttrs(token.Data) {
+ skipClosingTag = true
+ closingTagToSkipStack = append(closingTagToSkipStack, token.Data)
+ if p.addSpaces {
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
+ }
+ break
+ }
+ }
+
+ if !skipElementContent {
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
+ }
+
+ case html.EndTagToken:
+
+ if mostRecentlyStartedToken == normaliseElementName(token.Data) {
+ mostRecentlyStartedToken = ""
+ }
+
+ switch normaliseElementName(token.Data) {
+ case `script`:
+ if !p.allowUnsafe {
+ continue
+ }
+ case `style`:
+ if !p.allowUnsafe {
+ continue
+ }
+ }
+
+ if skipClosingTag && closingTagToSkipStack[len(closingTagToSkipStack)-1] == token.Data {
+ closingTagToSkipStack = closingTagToSkipStack[:len(closingTagToSkipStack)-1]
+ if len(closingTagToSkipStack) == 0 {
+ skipClosingTag = false
+ }
+ if p.addSpaces {
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
+ }
+ break
+ }
+ if _, ok := p.elsAndAttrs[token.Data]; !ok {
+ match := false
+ for regex := range p.elsMatchingAndAttrs {
+ if regex.MatchString(token.Data) {
+ skipElementContent = false
+ match = true
+ break
+ }
+ }
+ if _, ok := p.setOfElementsToSkipContent[token.Data]; ok && !match {
+ skippingElementsCount--
+ if skippingElementsCount == 0 {
+ skipElementContent = false
+ }
+ }
+ if !match {
+ if p.addSpaces {
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
+ }
+ break
+ }
+ }
+
+ if !skipElementContent {
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
+ }
+
+ case html.SelfClosingTagToken:
+
+ switch normaliseElementName(token.Data) {
+ case `script`:
+ if !p.allowUnsafe {
+ continue
+ }
+ case `style`:
+ if !p.allowUnsafe {
+ continue
+ }
+ }
+
+ aps, ok := p.elsAndAttrs[token.Data]
+ if !ok {
+ aa, matched := p.matchRegex(token.Data)
+ if !matched {
+ if p.addSpaces && !matched {
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
+ }
+ break
+ }
+ aps = aa
+ }
+
+ if len(token.Attr) != 0 {
+ token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
+ }
+
+ if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
+ if p.addSpaces {
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
+ }
+ break
+ }
+ if !skipElementContent {
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
+ }
+
+ case html.TextToken:
+
+ if !skipElementContent {
+ switch mostRecentlyStartedToken {
+ case `script`:
+ // not encouraged, but if a policy allows JavaScript we
+ // should not HTML escape it as that would break the output
+ //
+ // requires p.AllowUnsafe()
+ if p.allowUnsafe {
+ if _, err := buff.WriteString(token.Data); err != nil {
+ return err
+ }
+ }
+ case "style":
+ // not encouraged, but if a policy allows CSS styles we
+ // should not HTML escape it as that would break the output
+ //
+ // requires p.AllowUnsafe()
+ if p.allowUnsafe {
+ if _, err := buff.WriteString(token.Data); err != nil {
+ return err
+ }
+ }
+ default:
+ // HTML escape the text
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
+ }
+ }
+
+ default:
+ // A token that didn't exist in the html package when we wrote this
+ return fmt.Errorf("unknown token: %v", token)
+ }
+ }
+}
+
+// sanitizeAttrs takes a set of element attribute policies and the global
+// attribute policies and applies them to the []html.Attribute returning a set
+// of html.Attributes that match the policies
+func (p *Policy) sanitizeAttrs(
+ elementName string,
+ attrs []html.Attribute,
+ aps map[string][]attrPolicy,
+) []html.Attribute {
+
+ if len(attrs) == 0 {
+ return attrs
+ }
+
+ hasStylePolicies := false
+ sps, elementHasStylePolicies := p.elsAndStyles[elementName]
+ if len(p.globalStyles) > 0 || (elementHasStylePolicies && len(sps) > 0) {
+ hasStylePolicies = true
+ }
+ // no specific element policy found, look for a pattern match
+ if !hasStylePolicies {
+ for k, v := range p.elsMatchingAndStyles {
+ if k.MatchString(elementName) {
+ if len(v) > 0 {
+ hasStylePolicies = true
+ break
+ }
+ }
+ }
+ }
+
+ // Builds a new attribute slice based on the whether the attribute has been
+ // allowed explicitly or globally.
+ cleanAttrs := []html.Attribute{}
+attrsLoop:
+ for _, htmlAttr := range attrs {
+ if p.allowDataAttributes {
+ // If we see a data attribute, let it through.
+ if isDataAttribute(htmlAttr.Key) {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ continue
+ }
+ }
+ // Is this a "style" attribute, and if so, do we need to sanitize it?
+ if htmlAttr.Key == "style" && hasStylePolicies {
+ htmlAttr = p.sanitizeStyles(htmlAttr, elementName)
+ if htmlAttr.Val == "" {
+ // We've sanitized away any and all styles; don't bother to
+ // output the style attribute (even if it's allowed)
+ continue
+ } else {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ continue
+ }
+ }
+
+ // Is there an element specific attribute policy that applies?
+ if apl, ok := aps[htmlAttr.Key]; ok {
+ for _, ap := range apl {
+ if ap.regexp != nil {
+ if ap.regexp.MatchString(htmlAttr.Val) {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ continue attrsLoop
+ }
+ } else {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ continue attrsLoop
+ }
+ }
+ }
+
+ // Is there a global attribute policy that applies?
+ if apl, ok := p.globalAttrs[htmlAttr.Key]; ok {
+ for _, ap := range apl {
+ if ap.regexp != nil {
+ if ap.regexp.MatchString(htmlAttr.Val) {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ continue attrsLoop
+ }
+ } else {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ continue attrsLoop
+ }
+ }
+ }
+ }
+
+ if len(cleanAttrs) == 0 {
+ // If nothing was allowed, let's get out of here
+ return cleanAttrs
+ }
+ // cleanAttrs now contains the attributes that are permitted
+
+ if linkable(elementName) {
+ if p.requireParseableURLs {
+ // Ensure URLs are parseable:
+ // - a.href
+ // - area.href
+ // - link.href
+ // - blockquote.cite
+ // - q.cite
+ // - img.src
+ // - script.src
+ tmpAttrs := []html.Attribute{}
+ for _, htmlAttr := range cleanAttrs {
+ switch elementName {
+ case "a", "area", "base", "link":
+ if htmlAttr.Key == "href" {
+ if u, ok := p.validURL(htmlAttr.Val); ok {
+ htmlAttr.Val = u
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ }
+ break
+ }
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ case "blockquote", "del", "ins", "q":
+ if htmlAttr.Key == "cite" {
+ if u, ok := p.validURL(htmlAttr.Val); ok {
+ htmlAttr.Val = u
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ }
+ break
+ }
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ case "audio", "embed", "iframe", "img", "script", "source", "track", "video":
+ if htmlAttr.Key == "src" {
+ if u, ok := p.validURL(htmlAttr.Val); ok {
+ if p.srcRewriter != nil {
+ parsedURL, err := url.Parse(u)
+ if err != nil {
+ fmt.Println(err)
+ }
+ p.srcRewriter(parsedURL)
+ u = parsedURL.String()
+ }
+ htmlAttr.Val = u
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ }
+ break
+ }
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ default:
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ }
+ }
+ cleanAttrs = tmpAttrs
+ }
+
+ if (p.requireNoFollow ||
+ p.requireNoFollowFullyQualifiedLinks ||
+ p.requireNoReferrer ||
+ p.requireNoReferrerFullyQualifiedLinks ||
+ p.addTargetBlankToFullyQualifiedLinks) &&
+ len(cleanAttrs) > 0 {
+
+ // Add rel="nofollow" if a "href" exists
+ switch elementName {
+ case "a", "area", "base", "link":
+ var hrefFound bool
+ var externalLink bool
+ for _, htmlAttr := range cleanAttrs {
+ if htmlAttr.Key == "href" {
+ hrefFound = true
+
+ u, err := url.Parse(htmlAttr.Val)
+ if err != nil {
+ continue
+ }
+ if u.Host != "" {
+ externalLink = true
+ }
+
+ continue
+ }
+ }
+
+ if hrefFound {
+ var (
+ noFollowFound bool
+ noReferrerFound bool
+ targetBlankFound bool
+ )
+
+ addNoFollow := (p.requireNoFollow ||
+ externalLink && p.requireNoFollowFullyQualifiedLinks)
+
+ addNoReferrer := (p.requireNoReferrer ||
+ externalLink && p.requireNoReferrerFullyQualifiedLinks)
+
+ addTargetBlank := (externalLink &&
+ p.addTargetBlankToFullyQualifiedLinks)
+
+ tmpAttrs := []html.Attribute{}
+ for _, htmlAttr := range cleanAttrs {
+
+ var appended bool
+ if htmlAttr.Key == "rel" && (addNoFollow || addNoReferrer) {
+
+ if addNoFollow && !strings.Contains(htmlAttr.Val, "nofollow") {
+ htmlAttr.Val += " nofollow"
+ }
+ if addNoReferrer && !strings.Contains(htmlAttr.Val, "noreferrer") {
+ htmlAttr.Val += " noreferrer"
+ }
+ noFollowFound = addNoFollow
+ noReferrerFound = addNoReferrer
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ appended = true
+ }
+
+ if elementName == "a" && htmlAttr.Key == "target" {
+ if htmlAttr.Val == "_blank" {
+ targetBlankFound = true
+ }
+ if addTargetBlank && !targetBlankFound {
+ htmlAttr.Val = "_blank"
+ targetBlankFound = true
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ appended = true
+ }
+ }
+
+ if !appended {
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ }
+ }
+ if noFollowFound || noReferrerFound || targetBlankFound {
+ cleanAttrs = tmpAttrs
+ }
+
+ if (addNoFollow && !noFollowFound) || (addNoReferrer && !noReferrerFound) {
+ rel := html.Attribute{}
+ rel.Key = "rel"
+ if addNoFollow {
+ rel.Val = "nofollow"
+ }
+ if addNoReferrer {
+ if rel.Val != "" {
+ rel.Val += " "
+ }
+ rel.Val += "noreferrer"
+ }
+ cleanAttrs = append(cleanAttrs, rel)
+ }
+
+ if elementName == "a" && addTargetBlank && !targetBlankFound {
+ rel := html.Attribute{}
+ rel.Key = "target"
+ rel.Val = "_blank"
+ targetBlankFound = true
+ cleanAttrs = append(cleanAttrs, rel)
+ }
+
+ if targetBlankFound {
+ // target="_blank" has a security risk that allows the
+ // opened window/tab to issue JavaScript calls against
+ // window.opener, which in effect allow the destination
+ // of the link to control the source:
+ // https://dev.to/ben/the-targetblank-vulnerability-by-example
+ //
+ // To mitigate this risk, we need to add a specific rel
+ // attribute if it is not already present.
+ // rel="noopener"
+ //
+ // Unfortunately this is processing the rel twice (we
+ // already looked at it earlier ^^) as we cannot be sure
+ // of the ordering of the href and rel, and whether we
+ // have fully satisfied that we need to do this. This
+ // double processing only happens *if* target="_blank"
+ // is true.
+ var noOpenerAdded bool
+ tmpAttrs := []html.Attribute{}
+ for _, htmlAttr := range cleanAttrs {
+ var appended bool
+ if htmlAttr.Key == "rel" {
+ if strings.Contains(htmlAttr.Val, "noopener") {
+ noOpenerAdded = true
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ } else {
+ htmlAttr.Val += " noopener"
+ noOpenerAdded = true
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ }
+
+ appended = true
+ }
+ if !appended {
+ tmpAttrs = append(tmpAttrs, htmlAttr)
+ }
+ }
+ if noOpenerAdded {
+ cleanAttrs = tmpAttrs
+ } else {
+ // rel attr was not found, or else noopener would
+ // have been added already
+ rel := html.Attribute{}
+ rel.Key = "rel"
+ rel.Val = "noopener"
+ cleanAttrs = append(cleanAttrs, rel)
+ }
+
+ }
+ }
+ default:
+ }
+ }
+ }
+
+ if p.requireCrossOriginAnonymous && len(cleanAttrs) > 0 {
+ switch elementName {
+ case "audio", "img", "link", "script", "video":
+ var crossOriginFound bool
+ for i, htmlAttr := range cleanAttrs {
+ if htmlAttr.Key == "crossorigin" {
+ crossOriginFound = true
+ cleanAttrs[i].Val = "anonymous"
+ }
+ }
+
+ if !crossOriginFound {
+ crossOrigin := html.Attribute{}
+ crossOrigin.Key = "crossorigin"
+ crossOrigin.Val = "anonymous"
+ cleanAttrs = append(cleanAttrs, crossOrigin)
+ }
+ }
+ }
+
+ if p.requireSandboxOnIFrame != nil && elementName == "iframe" {
+ var sandboxFound bool
+ for i, htmlAttr := range cleanAttrs {
+ if htmlAttr.Key == "sandbox" {
+ sandboxFound = true
+ var cleanVals []string
+ cleanValsSet := make(map[string]bool)
+ for _, val := range strings.Fields(htmlAttr.Val) {
+ if p.requireSandboxOnIFrame[val] {
+ if !cleanValsSet[val] {
+ cleanVals = append(cleanVals, val)
+ cleanValsSet[val] = true
+ }
+ }
+ }
+ cleanAttrs[i].Val = strings.Join(cleanVals, " ")
+ }
+ }
+
+ if !sandboxFound {
+ sandbox := html.Attribute{}
+ sandbox.Key = "sandbox"
+ sandbox.Val = ""
+ cleanAttrs = append(cleanAttrs, sandbox)
+ }
+ }
+
+ return cleanAttrs
+}
+
+func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute {
+ sps := p.elsAndStyles[elementName]
+ if len(sps) == 0 {
+ sps = map[string][]stylePolicy{}
+ // check for any matching elements, if we don't already have a policy found
+ // if multiple matches are found they will be overwritten, it's best
+ // to not have overlapping matchers
+ for regex, policies := range p.elsMatchingAndStyles {
+ if regex.MatchString(elementName) {
+ for k, v := range policies {
+ sps[k] = append(sps[k], v...)
+ }
+ }
+ }
+ }
+
+ //Add semi-colon to end to fix parsing issue
+ attr.Val = strings.TrimRight(attr.Val, " ")
+ if len(attr.Val) > 0 && attr.Val[len(attr.Val)-1] != ';' {
+ attr.Val = attr.Val + ";"
+ }
+ decs, err := parser.ParseDeclarations(attr.Val)
+ if err != nil {
+ attr.Val = ""
+ return attr
+ }
+ clean := []string{}
+ prefixes := []string{"-webkit-", "-moz-", "-ms-", "-o-", "mso-", "-xv-", "-atsc-", "-wap-", "-khtml-", "prince-", "-ah-", "-hp-", "-ro-", "-rim-", "-tc-"}
+
+decLoop:
+ for _, dec := range decs {
+ tempProperty := strings.ToLower(dec.Property)
+ tempValue := removeUnicode(strings.ToLower(dec.Value))
+ for _, i := range prefixes {
+ tempProperty = strings.TrimPrefix(tempProperty, i)
+ }
+ if spl, ok := sps[tempProperty]; ok {
+ for _, sp := range spl {
+ if sp.handler != nil {
+ if sp.handler(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if len(sp.enum) > 0 {
+ if stringInSlice(tempValue, sp.enum) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if sp.regexp != nil {
+ if sp.regexp.MatchString(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ }
+ }
+ }
+ if spl, ok := p.globalStyles[tempProperty]; ok {
+ for _, sp := range spl {
+ if sp.handler != nil {
+ if sp.handler(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if len(sp.enum) > 0 {
+ if stringInSlice(tempValue, sp.enum) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if sp.regexp != nil {
+ if sp.regexp.MatchString(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ }
+ }
+ }
+ }
+ if len(clean) > 0 {
+ attr.Val = strings.Join(clean, "; ")
+ } else {
+ attr.Val = ""
+ }
+ return attr
+}
+
+func (p *Policy) allowNoAttrs(elementName string) bool {
+ _, ok := p.setOfElementsAllowedWithoutAttrs[elementName]
+ if !ok {
+ for _, r := range p.setOfElementsMatchingAllowedWithoutAttrs {
+ if r.MatchString(elementName) {
+ ok = true
+ break
+ }
+ }
+ }
+ return ok
+}
+
+func (p *Policy) validURL(rawurl string) (string, bool) {
+ if p.requireParseableURLs {
+ // URLs are valid if when space is trimmed the URL is valid
+ rawurl = strings.TrimSpace(rawurl)
+
+ // URLs cannot contain whitespace, unless it is a data-uri
+ if strings.Contains(rawurl, " ") ||
+ strings.Contains(rawurl, "\t") ||
+ strings.Contains(rawurl, "\n") {
+ if !strings.HasPrefix(rawurl, `data:`) {
+ return "", false
+ }
+
+ // Remove \r and \n from base64 encoded data to pass url.Parse.
+ matched := dataURIbase64Prefix.FindString(rawurl)
+ if matched != "" {
+ rawurl = matched + strings.Replace(
+ strings.Replace(
+ rawurl[len(matched):],
+ "\r",
+ "",
+ -1,
+ ),
+ "\n",
+ "",
+ -1,
+ )
+ }
+ }
+
+ // URLs are valid if they parse
+ u, err := url.Parse(rawurl)
+ if err != nil {
+ return "", false
+ }
+
+ if u.Scheme != "" {
+ urlPolicies, ok := p.allowURLSchemes[u.Scheme]
+ if !ok {
+ for _, r := range p.allowURLSchemeRegexps {
+ if r.MatchString(u.Scheme) {
+ return u.String(), true
+ }
+ }
+
+ return "", false
+ }
+
+ if len(urlPolicies) == 0 {
+ return u.String(), true
+ }
+
+ for _, urlPolicy := range urlPolicies {
+ if urlPolicy(u) {
+ return u.String(), true
+ }
+ }
+
+ return "", false
+ }
+
+ if p.allowRelativeURLs {
+ if u.String() != "" {
+ return u.String(), true
+ }
+ }
+
+ return "", false
+ }
+
+ return rawurl, true
+}
+
+func linkable(elementName string) bool {
+ switch elementName {
+ case "a", "area", "base", "link":
+ // elements that allow .href
+ return true
+ case "blockquote", "del", "ins", "q":
+ // elements that allow .cite
+ return true
+ case "audio", "embed", "iframe", "img", "input", "script", "track", "video":
+ // elements that allow .src
+ return true
+ default:
+ return false
+ }
+}
+
+// stringInSlice returns true if needle exists in haystack
+func stringInSlice(needle string, haystack []string) bool {
+ for _, straw := range haystack {
+ if strings.EqualFold(straw, needle) {
+ return true
+ }
+ }
+ return false
+}
+
+func isDataAttribute(val string) bool {
+ if !dataAttribute.MatchString(val) {
+ return false
+ }
+ rest := strings.Split(val, "data-")
+ if len(rest) == 1 {
+ return false
+ }
+ // data-xml* is invalid.
+ if dataAttributeXMLPrefix.MatchString(rest[1]) {
+ return false
+ }
+ // no uppercase or semi-colons allowed.
+ if dataAttributeInvalidChars.MatchString(rest[1]) {
+ return false
+ }
+ return true
+}
+
+func removeUnicode(value string) string {
+ substitutedValue := value
+ currentLoc := cssUnicodeChar.FindStringIndex(substitutedValue)
+ for currentLoc != nil {
+
+ character := substitutedValue[currentLoc[0]+1 : currentLoc[1]]
+ character = strings.TrimSpace(character)
+ if len(character) < 4 {
+ character = strings.Repeat("0", 4-len(character)) + character
+ } else {
+ for len(character) > 4 {
+ if character[0] != '0' {
+ character = ""
+ break
+ } else {
+ character = character[1:]
+ }
+ }
+ }
+ character = "\\u" + character
+ translatedChar, err := strconv.Unquote(`"` + character + `"`)
+ translatedChar = strings.TrimSpace(translatedChar)
+ if err != nil {
+ return ""
+ }
+ substitutedValue = substitutedValue[0:currentLoc[0]] + translatedChar + substitutedValue[currentLoc[1]:]
+ currentLoc = cssUnicodeChar.FindStringIndex(substitutedValue)
+ }
+ return substitutedValue
+}
+
+func (p *Policy) matchRegex(elementName string) (map[string][]attrPolicy, bool) {
+ aps := make(map[string][]attrPolicy, 0)
+ matched := false
+ for regex, attrs := range p.elsMatchingAndAttrs {
+ if regex.MatchString(elementName) {
+ matched = true
+ for k, v := range attrs {
+ aps[k] = append(aps[k], v...)
+ }
+ }
+ }
+ return aps, matched
+}
+
+// normaliseElementName takes a HTML element like