Files
opencloud/vendor/github.com/gogs/chardet/utf8.go
Pascal Bleser 9680008790 test(groupware): add testcontainers based jmap test
* adds pkg/jmap/jmap_integration_test.go

 * uses ghcr.io/stalwartlabs/stalwart:v0.13.2-alpine

 * can be disabled by setting one of the following environment
   variables, in the same fashion as ca0493b28
   - CI=woodpecker
   - CI_SYSTEM_NAME=woodpecker
   - USE_TESTCONTAINERS=false

 * dependencies:
   - bump github.com/go-test/deep from 1.1.0 to 1.1.1
   - add github.com/cention-sany/utf7
   - add github.com/dustinkirkland/golang-petname
   - add github.com/emersion/go-imap/v2
   - add github.com/emersion/go-message
   - add github.com/emersion/go-sasl
   - add github.com/go-crypt/crypt
   - add github.com/go-crypt/x
   - add github.com/gogs/chardet
   - add github.com/inbucket/html2text
   - add github.com/jhilleryerd/enmime/v2
   - add github.com/ssor/bom
   - add gopkg.in/loremipsum.v1
2026-01-22 09:42:19 +01:00

72 lines
1.3 KiB
Go

package chardet
import (
"bytes"
)
var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
type recognizerUtf8 struct {
}
func newRecognizer_utf8() *recognizerUtf8 {
return &recognizerUtf8{}
}
func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) {
output = recognizerOutput{
Charset: "UTF-8",
}
hasBom := bytes.HasPrefix(input.raw, utf8Bom)
inputLen := len(input.raw)
var numValid, numInvalid uint32
var trailBytes uint8
for i := 0; i < inputLen; i++ {
c := input.raw[i]
if c&0x80 == 0 {
continue
}
if c&0xE0 == 0xC0 {
trailBytes = 1
} else if c&0xF0 == 0xE0 {
trailBytes = 2
} else if c&0xF8 == 0xF0 {
trailBytes = 3
} else {
numInvalid++
if numInvalid > 5 {
break
}
trailBytes = 0
}
for i++; i < inputLen; i++ {
c = input.raw[i]
if c&0xC0 != 0x80 {
numInvalid++
break
}
if trailBytes--; trailBytes == 0 {
numValid++
break
}
}
}
if hasBom && numInvalid == 0 {
output.Confidence = 100
} else if hasBom && numValid > numInvalid*10 {
output.Confidence = 80
} else if numValid > 3 && numInvalid == 0 {
output.Confidence = 100
} else if numValid > 0 && numInvalid == 0 {
output.Confidence = 80
} else if numValid == 0 && numInvalid == 0 {
// Plain ASCII
output.Confidence = 10
} else if numValid > numInvalid*10 {
output.Confidence = 25
}
return
}