Files
opencloud/vendor/github.com/gogs/chardet/recognizer.go
Pascal Bleser a8c2beac3a test(groupware): add testcontainers based jmap test
* adds pkg/jmap/jmap_integration_test.go

 * uses ghcr.io/stalwartlabs/stalwart:v0.13.2-alpine

 * can be disabled by setting one of the following environment
   variables, in the same fashion as ca0493b28
   - CI=woodpecker
   - CI_SYSTEM_NAME=woodpecker
   - USE_TESTCONTAINERS=false

 * dependencies:
   - bump github.com/go-test/deep from 1.1.0 to 1.1.1
   - add github.com/cention-sany/utf7
   - add github.com/dustinkirkland/golang-petname
   - add github.com/emersion/go-imap/v2
   - add github.com/emersion/go-message
   - add github.com/emersion/go-sasl
   - add github.com/go-crypt/crypt
   - add github.com/go-crypt/x
   - add github.com/gogs/chardet
   - add github.com/inbucket/html2text
   - add github.com/jhilleryerd/enmime/v2
   - add github.com/ssor/bom
   - add gopkg.in/loremipsum.v1
2025-12-09 09:15:37 +01:00

84 lines
1.6 KiB
Go

package chardet
type recognizer interface {
Match(*recognizerInput) recognizerOutput
}
type recognizerOutput Result
type recognizerInput struct {
raw []byte
input []byte
tagStripped bool
byteStats []int
hasC1Bytes bool
}
func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
input, stripped := mayStripInput(raw, stripTag)
byteStats := computeByteStats(input)
return &recognizerInput{
raw: raw,
input: input,
tagStripped: stripped,
byteStats: byteStats,
hasC1Bytes: computeHasC1Bytes(byteStats),
}
}
func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) {
const inputBufferSize = 8192
out = make([]byte, 0, inputBufferSize)
var badTags, openTags int32
var inMarkup bool = false
stripped = false
if stripTag {
stripped = true
for _, c := range raw {
if c == '<' {
if inMarkup {
badTags += 1
}
inMarkup = true
openTags += 1
}
if !inMarkup {
out = append(out, c)
if len(out) >= inputBufferSize {
break
}
}
if c == '>' {
inMarkup = false
}
}
}
if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) {
limit := len(raw)
if limit > inputBufferSize {
limit = inputBufferSize
}
out = make([]byte, limit)
copy(out, raw[:limit])
stripped = false
}
return
}
func computeByteStats(input []byte) []int {
r := make([]int, 256)
for _, c := range input {
r[c] += 1
}
return r
}
func computeHasC1Bytes(byteStats []int) bool {
for _, count := range byteStats[0x80 : 0x9F+1] {
if count > 0 {
return true
}
}
return false
}