mirror of
https://github.com/kopia/kopia.git
synced 2026-03-27 18:42:26 -04:00
447 lines
11 KiB
Go
447 lines
11 KiB
Go
// Package wcmatch implements wildcard matching files using .gitignore syntax.
|
|
package wcmatch
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
// WildcardMatcher represents a wildcard-pattern (in .gitignore syntax) and options used to match against file paths.
|
|
type WildcardMatcher struct {
|
|
pattern string
|
|
tokens []token
|
|
dirOnly bool
|
|
negated bool
|
|
options Options
|
|
}
|
|
|
|
// Options defines flags that controls a WildcardMatcher.
|
|
type Options struct {
|
|
IgnoreCase bool
|
|
BaseDir string
|
|
}
|
|
|
|
// Option supports the functional option pattern for NewWildcardMatcher.
|
|
type Option func(*Options)
|
|
|
|
// IgnoreCase is used to enable/disable case-insensitive operation when creating a WildcardMatcher.
|
|
func IgnoreCase(enabled bool) Option {
|
|
return func(args *Options) {
|
|
args.IgnoreCase = enabled
|
|
}
|
|
}
|
|
|
|
// BaseDir is used to set the base directory to use when creating a WildcardMatcher.
|
|
// Providing a base dir here means the matcher will not match anything outside of that
|
|
// directory.
|
|
//
|
|
// Examples:
|
|
// The pattern 'a.txt' with the base dir '/my/base' is equivalent to
|
|
// the pattern '/my/base/**/a.txt' without a base dir.
|
|
//
|
|
// The pattern '/a.txt' with the base dir '/my/base/ is equivalent to
|
|
// the pattern '/my/base/a.txt' without a base dir.
|
|
func BaseDir(dir string) Option {
|
|
return func(args *Options) {
|
|
args.BaseDir = dir
|
|
}
|
|
}
|
|
|
|
// Pattern returns the original pattern from which this matcher was created.
|
|
func (matcher *WildcardMatcher) Pattern() string {
|
|
return matcher.pattern
|
|
}
|
|
|
|
// Negated inidicates whether the pattern used by this matcher is a negated pattern, i.e. starts with a '!'.
|
|
func (matcher *WildcardMatcher) Negated() bool {
|
|
return matcher.negated
|
|
}
|
|
|
|
// Options gets the options used when constructing the WildcardMatcher.
|
|
func (matcher *WildcardMatcher) Options() Options {
|
|
return matcher.options
|
|
}
|
|
|
|
// NewWildcardMatcher creates a new WildcardMatcher with the specified pattern and options.
|
|
// The default option is for the matcher to be case-sensitive without a base dir.
|
|
// nolint:funlen,gocognit,gocyclo,cyclop
|
|
func NewWildcardMatcher(pattern string, options ...Option) (matcher *WildcardMatcher, err error) {
|
|
var result []token
|
|
|
|
if len(pattern) > 2 && pattern[len(pattern)-2] != '\\' {
|
|
pattern = strings.TrimSpace(pattern)
|
|
}
|
|
|
|
args := &Options{
|
|
IgnoreCase: false,
|
|
BaseDir: "",
|
|
}
|
|
|
|
for _, option := range options {
|
|
option(args)
|
|
}
|
|
|
|
// Ensure that BaseDir does not have a trailing directory separator if it is non-empty.
|
|
if len(args.BaseDir) > 1 && strings.HasSuffix(args.BaseDir, "/") {
|
|
args.BaseDir = args.BaseDir[:len(args.BaseDir)-1]
|
|
}
|
|
|
|
p := newRuneScanner(pattern, args.IgnoreCase)
|
|
|
|
// Skip leading whitespace
|
|
for unicode.IsSpace(p.peek(0)) {
|
|
p.read()
|
|
}
|
|
|
|
negated := false
|
|
if p.peek(0) == '!' {
|
|
negated = true
|
|
|
|
p.read()
|
|
}
|
|
|
|
isPatternRooted := p.peek(0) == '/'
|
|
|
|
// Prepend the base directory to the pattern if we have one.
|
|
if args.BaseDir != "" && pattern != "" {
|
|
for _, ch := range args.BaseDir {
|
|
if ch == '/' {
|
|
result = append(result, tokenDirSep{})
|
|
} else {
|
|
result = append(result, tokenRune{ch})
|
|
}
|
|
}
|
|
|
|
if !isPatternRooted {
|
|
result = append(result, tokenDirSep{})
|
|
}
|
|
}
|
|
|
|
// If the pattern isn't rooted, i.e. doesn't start with a '/', nor contain a '/' in the middle somewhere, then we want it to match
|
|
// anywhere, so we add an implicit '**/' to the start of the pattern.
|
|
firstSlashIndex := p.indexOf('/')
|
|
if !isPatternRooted && pattern != "**" && pattern != "" {
|
|
if firstSlashIndex == -1 || firstSlashIndex == len(p.text)-1-p.pos {
|
|
result = append(result, tokenStar{true}, tokenDirSep{})
|
|
} else if args.BaseDir == "" {
|
|
// An unrooted pattern that contains a slash in the middle should be considered rooted, so
|
|
// prepend the pattern with a '/', unless we have a baseDir, in which case we already have rooted
|
|
// the pattern above.
|
|
result = append(result, tokenDirSep{})
|
|
}
|
|
}
|
|
|
|
dirOnly := false
|
|
if len(p.text) > 0 && p.text[len(p.text)-1] == '/' {
|
|
dirOnly = true
|
|
p.text = p.text[:len(p.text)-1]
|
|
}
|
|
|
|
for ch := p.read(); ch != 0; ch = p.read() {
|
|
switch ch {
|
|
case '\\':
|
|
if p.eos() {
|
|
return nil, errors.Errorf("invalid pattern \"%v\": end of line found after '\\' character. Use '\\\\' to indicate a literal backslash in a pattern", pattern)
|
|
}
|
|
|
|
ch = p.read()
|
|
result = append(result, tokenRune{ch})
|
|
|
|
case '?':
|
|
result = append(result, tokenAnyChar{})
|
|
|
|
case '/':
|
|
result = append(result, tokenDirSep{})
|
|
|
|
case '*':
|
|
if p.peek(0) == '*' {
|
|
// We have a double star
|
|
prevCh := p.peek(-2)
|
|
|
|
for p.peek(0) == '*' {
|
|
// Just consume contiguous stars
|
|
p.read()
|
|
}
|
|
|
|
if prevCh == 0 || prevCh == '/' && (p.peek(0) == '/' || p.eos()) {
|
|
result = append(result, tokenStar{doubleStar: true})
|
|
continue
|
|
}
|
|
}
|
|
|
|
result = append(result, tokenStar{doubleStar: false})
|
|
|
|
case '[':
|
|
ch = p.read()
|
|
negatedSeq := ch == '!'
|
|
|
|
if negatedSeq {
|
|
ch = p.read()
|
|
}
|
|
|
|
var seq []seqToken
|
|
|
|
for ; ch != ']' && ch != 0; ch = p.read() {
|
|
if ch == 0 {
|
|
return nil, errors.Errorf("invalid pattern \"%v\": end of line found, expected ']'", pattern)
|
|
}
|
|
|
|
if ch == '\\' {
|
|
ch = p.read()
|
|
if ch == 0 {
|
|
return nil, errors.Errorf("invalid pattern \"%v\": end of line found after '\\' character. Use '\\\\' to indicate a literal backslash in a pattern", pattern)
|
|
}
|
|
}
|
|
|
|
switch {
|
|
case p.peek(0) == '-' && p.peek(1) != ']' && p.peek(1) != 0:
|
|
// we have a range
|
|
p.read() // consume the '-'
|
|
endCh := p.read()
|
|
|
|
if endCh == '\\' {
|
|
endCh = p.read()
|
|
if endCh == 0 {
|
|
return nil, errors.Errorf("invalid pattern \"%v\": end of line found after '\\' character. Use '\\\\' to indicate a literal backslash in a pattern", pattern)
|
|
}
|
|
}
|
|
|
|
seq = append(seq, seqTokenRuneRange{ch, endCh, negatedSeq})
|
|
|
|
case ch == '[' && p.peek(0) == ':':
|
|
closingBracketIndex := p.indexOf(']')
|
|
if closingBracketIndex == -1 {
|
|
return nil, errors.Errorf("invalid pattern \"%v\": unterminated sequence, expected ']'", pattern)
|
|
}
|
|
|
|
if closingBracketIndex-1 <= p.pos || p.peek(closingBracketIndex-1) != ':' {
|
|
// treat as normal
|
|
seq = append(seq, seqTokenRune{ch})
|
|
continue
|
|
}
|
|
|
|
class := strings.ToLower(string(p.text[p.pos+1 : closingBracketIndex+p.pos-1]))
|
|
p.pos += closingBracketIndex + 1
|
|
|
|
var m func(ch rune) bool
|
|
|
|
switch class {
|
|
case "alnum":
|
|
m = func(ch rune) bool {
|
|
return unicode.IsLetter(ch) || unicode.IsDigit(ch)
|
|
}
|
|
case "alpha":
|
|
m = unicode.IsLetter
|
|
|
|
case "ascii":
|
|
m = func(ch rune) bool {
|
|
return ch >= 0 && ch <= 127
|
|
}
|
|
case "blank":
|
|
m = func(ch rune) bool {
|
|
return unicode.Is(unicode.Zs, ch) || ch == '\t'
|
|
}
|
|
case "cntrl":
|
|
m = unicode.IsControl
|
|
|
|
case "digit":
|
|
m = unicode.IsDigit
|
|
|
|
case "graph":
|
|
m = unicode.IsGraphic
|
|
|
|
case "lower":
|
|
m = unicode.IsLower
|
|
|
|
case "print":
|
|
m = unicode.IsPrint
|
|
|
|
case "punct":
|
|
m = func(ch rune) bool {
|
|
return unicode.IsPunct(ch) || unicode.IsSymbol(ch)
|
|
}
|
|
case "space":
|
|
m = unicode.IsSpace
|
|
|
|
case "upper":
|
|
if args.IgnoreCase {
|
|
m = func(ch rune) bool {
|
|
return unicode.IsUpper(ch) || unicode.IsLower(ch)
|
|
}
|
|
} else {
|
|
m = unicode.IsUpper
|
|
}
|
|
case "xdigit":
|
|
m = func(ch rune) bool {
|
|
return ch >= 'A' && ch <= 'F' ||
|
|
ch >= 'a' && ch <= 'f' ||
|
|
unicode.IsDigit(ch)
|
|
}
|
|
|
|
default:
|
|
return nil, errors.Errorf("invalid pattern %#v: unrecognized character class [:%v:]", pattern, class)
|
|
}
|
|
|
|
seq = append(seq, seqTokenClass{class, m})
|
|
default:
|
|
seq = append(seq, seqTokenRune{ch})
|
|
}
|
|
}
|
|
|
|
if ch != ']' {
|
|
return nil, errors.Errorf("invalid pattern %#v: unterminated sequence, expected ']'", pattern)
|
|
}
|
|
|
|
result = append(result, tokenSeq{seq, negatedSeq})
|
|
|
|
default:
|
|
result = append(result, tokenRune{ch})
|
|
}
|
|
}
|
|
|
|
return &WildcardMatcher{
|
|
pattern: pattern,
|
|
tokens: result,
|
|
options: *args,
|
|
dirOnly: dirOnly,
|
|
negated: negated,
|
|
}, nil
|
|
}
|
|
|
|
type matchResult int
|
|
|
|
const (
|
|
wcMatch matchResult = iota
|
|
wcNoMatch
|
|
wcAbortAll
|
|
wcAbortToDoubleStar
|
|
)
|
|
|
|
// Match matches the specified text against the pattern of this WildcardMatcher. Returns true if it is a match,
|
|
// and false if it is not. isDir should be set to true to indicate that the specified text is a directory, and
|
|
// false if it is a file.
|
|
func (matcher *WildcardMatcher) Match(text string, isDir bool) bool {
|
|
if matcher.dirOnly && !isDir {
|
|
return matcher.negated
|
|
}
|
|
|
|
return (doMatch(matcher.tokens, []rune(text), matcher.options.IgnoreCase) == wcMatch) != matcher.negated
|
|
}
|
|
|
|
//nolint:gocognit,gocyclo,cyclop
|
|
func doMatch(tokens []token, text []rune, ignoreCase bool) matchResult {
|
|
t := runeScanner{0, text, ignoreCase}
|
|
|
|
var tch rune
|
|
for pi := 0; pi < len(tokens); pi, _ = pi+1, t.read() {
|
|
tch = t.peek(0)
|
|
|
|
if t.eos() && !isStar(tokens[pi]) {
|
|
// We have reached the end of the text but with pattern still remaining without an '*'. This is not a match.
|
|
return wcAbortAll
|
|
}
|
|
|
|
switch token := tokens[pi].(type) {
|
|
case tokenRune:
|
|
if tch != token.Ch {
|
|
return wcNoMatch
|
|
}
|
|
|
|
case tokenDirSep:
|
|
if tch != '/' {
|
|
return wcNoMatch
|
|
}
|
|
case tokenAnyChar:
|
|
if tch == '/' {
|
|
return wcNoMatch
|
|
}
|
|
|
|
continue
|
|
|
|
case tokenStar:
|
|
if pi == len(tokens)-1 {
|
|
// Trailing ** matches everything. Trailing '*' matches only if there are no more directory separators.
|
|
if !token.doubleStar && indexOf(text[t.pos:], '/') != -1 {
|
|
return wcNoMatch
|
|
}
|
|
|
|
return wcMatch
|
|
}
|
|
|
|
if token.doubleStar && pi+2 < len(tokens) && isDirSep(tokens[pi+1]) && doMatch(tokens[pi+2:], text[t.pos:], ignoreCase) == wcMatch {
|
|
return wcMatch
|
|
}
|
|
|
|
if !token.doubleStar && isDirSep(tokens[pi+1]) {
|
|
// One asterisk followed by a slash
|
|
slashIndex := indexOf(text[t.pos:], '/')
|
|
if slashIndex == -1 {
|
|
return wcNoMatch
|
|
}
|
|
|
|
// Skip all characters up to the upcoming directory sep.
|
|
t.pos += slashIndex - 1
|
|
|
|
break
|
|
}
|
|
|
|
for {
|
|
if t.eos() {
|
|
break
|
|
}
|
|
|
|
matchResult := doMatch(tokens[pi+1:], text[t.pos:], ignoreCase)
|
|
if matchResult != wcNoMatch {
|
|
if !token.doubleStar || matchResult != wcAbortToDoubleStar {
|
|
return matchResult
|
|
}
|
|
} else if !token.doubleStar && tch == '/' {
|
|
// We are working on a single asterisk matching and encountered a '/', so return AbortToStarStar, meaning any
|
|
// recursive calls will abort until we reach a '**' matching loop where we will then continue.
|
|
return wcAbortToDoubleStar
|
|
}
|
|
|
|
tch = t.read()
|
|
}
|
|
|
|
return wcAbortAll
|
|
|
|
case tokenSeq:
|
|
match := false
|
|
|
|
for _, r := range token.items {
|
|
if r.match(tch) {
|
|
match = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if match == token.negated {
|
|
return wcNoMatch
|
|
}
|
|
|
|
default:
|
|
panic(fmt.Sprintf("internal error, unsupported token %T", token))
|
|
}
|
|
}
|
|
|
|
if t.eos() {
|
|
return wcMatch
|
|
}
|
|
|
|
return wcNoMatch
|
|
}
|
|
|
|
func indexOf(slice []rune, ch rune) int {
|
|
for i, n := range slice {
|
|
if n == ch {
|
|
return i
|
|
}
|
|
}
|
|
|
|
return -1
|
|
}
|