mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 13:42:20 -04:00
* feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
436 lines
12 KiB
Go
436 lines
12 KiB
Go
package peg
|
|
|
|
import "regexp"
|
|
|
|
var invalidRuleCharsRe = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
|
|
|
|
// Builder provides a fluent API for constructing parsers.
|
|
type Builder struct {
|
|
arena Arena
|
|
}
|
|
|
|
func NewBuilder() *Builder {
|
|
return &Builder{
|
|
arena: Arena{
|
|
rules: make(map[string]ParserID),
|
|
root: InvalidParserID,
|
|
},
|
|
}
|
|
}
|
|
|
|
func (b *Builder) add(p Parser) ParserID {
|
|
return b.arena.addParser(p)
|
|
}
|
|
|
|
// Eps matches nothing, always succeeds.
|
|
func (b *Builder) Eps() ParserID {
|
|
return b.add(&EpsilonParser{})
|
|
}
|
|
|
|
// Start matches start of input.
|
|
func (b *Builder) Start() ParserID {
|
|
return b.add(&StartParser{})
|
|
}
|
|
|
|
// End matches end of input.
|
|
func (b *Builder) End() ParserID {
|
|
return b.add(&EndParser{})
|
|
}
|
|
|
|
// Literal matches an exact string.
|
|
func (b *Builder) Literal(s string) ParserID {
|
|
return b.add(&LiteralParser{Literal: s})
|
|
}
|
|
|
|
// Seq matches a sequence of parsers in order.
|
|
func (b *Builder) Seq(children ...ParserID) ParserID {
|
|
// Flatten nested sequences
|
|
var flattened []ParserID
|
|
for _, id := range children {
|
|
if seq, ok := b.arena.parsers[id].(*SequenceParser); ok {
|
|
flattened = append(flattened, seq.Children...)
|
|
} else {
|
|
flattened = append(flattened, id)
|
|
}
|
|
}
|
|
return b.add(&SequenceParser{Children: flattened})
|
|
}
|
|
|
|
// Choice tries alternatives until one succeeds.
|
|
func (b *Builder) Choice(children ...ParserID) ParserID {
|
|
// Flatten nested choices
|
|
var flattened []ParserID
|
|
for _, id := range children {
|
|
if ch, ok := b.arena.parsers[id].(*ChoiceParser); ok {
|
|
flattened = append(flattened, ch.Children...)
|
|
} else {
|
|
flattened = append(flattened, id)
|
|
}
|
|
}
|
|
return b.add(&ChoiceParser{Children: flattened})
|
|
}
|
|
|
|
// Optional matches zero or one occurrence.
|
|
func (b *Builder) Optional(child ParserID) ParserID {
|
|
return b.Repeat(child, 0, 1)
|
|
}
|
|
|
|
// ZeroOrMore matches zero or more occurrences.
|
|
func (b *Builder) ZeroOrMore(child ParserID) ParserID {
|
|
return b.Repeat(child, 0, -1)
|
|
}
|
|
|
|
// OneOrMore matches one or more occurrences.
|
|
func (b *Builder) OneOrMore(child ParserID) ParserID {
|
|
return b.Repeat(child, 1, -1)
|
|
}
|
|
|
|
// Repeat matches between min and max times. Use -1 for unbounded max.
|
|
func (b *Builder) Repeat(child ParserID, min, max int) ParserID {
|
|
return b.add(&RepetitionParser{Child: child, MinCount: min, MaxCount: max})
|
|
}
|
|
|
|
// Peek is a positive lookahead — succeeds if child succeeds, consumes nothing.
|
|
func (b *Builder) Peek(child ParserID) ParserID {
|
|
return b.add(&AndParser{Child: child})
|
|
}
|
|
|
|
// Negate is a negative lookahead — succeeds if child fails, consumes nothing.
|
|
func (b *Builder) Negate(child ParserID) ParserID {
|
|
return b.add(&NotParser{Child: child})
|
|
}
|
|
|
|
// Any matches a single UTF-8 codepoint.
|
|
func (b *Builder) Any() ParserID {
|
|
return b.add(&AnyParser{})
|
|
}
|
|
|
|
// Space matches zero or more whitespace characters.
|
|
func (b *Builder) Space() ParserID {
|
|
return b.add(&SpaceParser{})
|
|
}
|
|
|
|
// Chars matches characters from a character class expression like "[a-z]".
|
|
func (b *Builder) Chars(classes string, min, max int) ParserID {
|
|
ranges, negated := parseCharClasses(classes)
|
|
return b.add(&CharsParser{
|
|
Pattern: classes,
|
|
Ranges: ranges,
|
|
Negated: negated,
|
|
MinCount: min,
|
|
MaxCount: max,
|
|
})
|
|
}
|
|
|
|
// Until matches all characters until a delimiter is found (not consumed).
|
|
func (b *Builder) Until(delimiter string) ParserID {
|
|
return b.add(&UntilParser{Delimiters: []string{delimiter}})
|
|
}
|
|
|
|
// UntilOneOf matches until any of the delimiters is found.
|
|
func (b *Builder) UntilOneOf(delimiters ...string) ParserID {
|
|
return b.add(&UntilParser{Delimiters: delimiters})
|
|
}
|
|
|
|
// Rest matches everything to end of input.
|
|
func (b *Builder) Rest() ParserID {
|
|
return b.add(&UntilParser{Delimiters: nil})
|
|
}
|
|
|
|
// JSONString matches JSON string content (without surrounding quotes).
|
|
func (b *Builder) JSONString() ParserID {
|
|
return b.add(&JSONStringParser{})
|
|
}
|
|
|
|
// JSON matches a complete JSON value.
|
|
func (b *Builder) JSON() ParserID {
|
|
return b.add(&JSONParser{})
|
|
}
|
|
|
|
// JSONNumber matches a JSON number.
|
|
func (b *Builder) JSONNumber() ParserID {
|
|
// We implement this as a dedicated parser entry that delegates to parseJSONNumber
|
|
return b.add(&jsonNumberParser{})
|
|
}
|
|
|
|
// PythonDictString matches single-quoted string content (without quotes).
|
|
func (b *Builder) PythonDictString() ParserID {
|
|
return b.add(&PythonDictStringParser{})
|
|
}
|
|
|
|
// DoubleQuotedString matches a double-quoted string: "content" + space
|
|
func (b *Builder) DoubleQuotedString() ParserID {
|
|
return b.LazyRule("dq-string", func() ParserID {
|
|
return b.Seq(b.Literal(`"`), b.JSONString(), b.Literal(`"`), b.Space())
|
|
})
|
|
}
|
|
|
|
// SingleQuotedString matches a single-quoted string: 'content' + space
|
|
func (b *Builder) SingleQuotedString() ParserID {
|
|
return b.LazyRule("sq-string", func() ParserID {
|
|
return b.Seq(b.Literal("'"), b.PythonDictString(), b.Literal("'"), b.Space())
|
|
})
|
|
}
|
|
|
|
// FlexibleString matches either a double or single-quoted string.
|
|
func (b *Builder) FlexibleString() ParserID {
|
|
return b.LazyRule("flexible-string", func() ParserID {
|
|
return b.Choice(b.DoubleQuotedString(), b.SingleQuotedString())
|
|
})
|
|
}
|
|
|
|
// Marker matches <...> or [...] delimited text.
|
|
func (b *Builder) Marker() ParserID {
|
|
return b.Choice(
|
|
b.Seq(b.Literal("<"), b.Until(">"), b.Literal(">")),
|
|
b.Seq(b.Literal("["), b.Until("]"), b.Literal("]")),
|
|
)
|
|
}
|
|
|
|
// PythonValue matches a Python-style value (dict, array, string, number, bool, None).
|
|
func (b *Builder) PythonValue() ParserID {
|
|
return b.LazyRule("python-value", func() ParserID {
|
|
return b.Choice(
|
|
b.PythonDict(), b.PythonArray(), b.PythonString(),
|
|
b.JSONNumber(), b.PythonBool(), b.PythonNull(),
|
|
)
|
|
})
|
|
}
|
|
|
|
// PythonString matches a Python string (double or single-quoted).
|
|
func (b *Builder) PythonString() ParserID {
|
|
return b.LazyRule("python-string", func() ParserID {
|
|
return b.Choice(b.DoubleQuotedString(), b.SingleQuotedString())
|
|
})
|
|
}
|
|
|
|
// PythonBool matches True or False.
|
|
func (b *Builder) PythonBool() ParserID {
|
|
return b.LazyRule("python-bool", func() ParserID {
|
|
return b.Seq(b.Choice(b.Literal("True"), b.Literal("False")), b.Space())
|
|
})
|
|
}
|
|
|
|
// PythonNull matches None.
|
|
func (b *Builder) PythonNull() ParserID {
|
|
return b.LazyRule("python-none", func() ParserID {
|
|
return b.Seq(b.Literal("None"), b.Space())
|
|
})
|
|
}
|
|
|
|
// PythonDict matches a Python dictionary {key: value, ...}.
|
|
func (b *Builder) PythonDict() ParserID {
|
|
return b.LazyRule("python-dict", func() ParserID {
|
|
member := b.Seq(b.PythonString(), b.Space(), b.Literal(":"), b.Space(), b.PythonValue())
|
|
return b.Seq(
|
|
b.Literal("{"), b.Space(),
|
|
b.Optional(b.Seq(member, b.ZeroOrMore(b.Seq(b.Space(), b.Literal(","), b.Space(), member)))),
|
|
b.Space(), b.Literal("}"), b.Space(),
|
|
)
|
|
})
|
|
}
|
|
|
|
// PythonArray matches a Python array [value, ...].
|
|
func (b *Builder) PythonArray() ParserID {
|
|
return b.LazyRule("python-array", func() ParserID {
|
|
return b.Seq(
|
|
b.Literal("["), b.Space(),
|
|
b.Optional(b.Seq(b.PythonValue(), b.ZeroOrMore(b.Seq(b.Space(), b.Literal(","), b.Space(), b.PythonValue())))),
|
|
b.Space(), b.Literal("]"), b.Space(),
|
|
)
|
|
})
|
|
}
|
|
|
|
// LazyRule creates a named rule with deferred construction to support recursion.
|
|
// If the rule already exists, returns a ref to it. Otherwise, creates a placeholder,
|
|
// builds the child, and replaces the placeholder.
|
|
func (b *Builder) LazyRule(name string, builderFn func() ParserID) ParserID {
|
|
cleanName := invalidRuleCharsRe.ReplaceAllString(name, "-")
|
|
if _, exists := b.arena.rules[cleanName]; exists {
|
|
return b.add(&RefParser{Name: cleanName})
|
|
}
|
|
|
|
// Create placeholder rule to allow recursive references
|
|
placeholderChild := b.add(&AnyParser{})
|
|
ruleID := b.add(&RuleParser{Name: cleanName, Child: placeholderChild})
|
|
b.arena.rules[cleanName] = ruleID
|
|
|
|
// Build the actual parser
|
|
child := builderFn()
|
|
|
|
// Update the rule with the real child
|
|
b.arena.parsers[ruleID] = &RuleParser{Name: cleanName, Child: child}
|
|
|
|
return b.add(&RefParser{Name: cleanName})
|
|
}
|
|
|
|
// Rule creates a named rule and returns a ref to it.
|
|
func (b *Builder) Rule(name string, child ParserID) ParserID {
|
|
cleanName := invalidRuleCharsRe.ReplaceAllString(name, "-")
|
|
ruleID := b.add(&RuleParser{Name: cleanName, Child: child})
|
|
b.arena.rules[cleanName] = ruleID
|
|
return b.add(&RefParser{Name: cleanName})
|
|
}
|
|
|
|
// TriggerRule creates a named rule marked as a trigger (for lazy grammar generation).
|
|
func (b *Builder) TriggerRule(name string, child ParserID) ParserID {
|
|
cleanName := invalidRuleCharsRe.ReplaceAllString(name, "-")
|
|
ruleID := b.add(&RuleParser{Name: cleanName, Child: child, Trigger: true})
|
|
b.arena.rules[cleanName] = ruleID
|
|
return b.add(&RefParser{Name: cleanName})
|
|
}
|
|
|
|
// Ref creates a forward reference to a named rule.
|
|
func (b *Builder) Ref(name string) ParserID {
|
|
return b.add(&RefParser{Name: name})
|
|
}
|
|
|
|
// Atomic creates a parser that suppresses partial AST nodes.
|
|
func (b *Builder) Atomic(child ParserID) ParserID {
|
|
return b.add(&AtomicParser{Child: child})
|
|
}
|
|
|
|
// Tag creates a semantic tag in the AST.
|
|
func (b *Builder) Tag(tag string, child ParserID) ParserID {
|
|
return b.add(&TagParser{Child: child, Tag: tag})
|
|
}
|
|
|
|
// Schema wraps a parser with schema metadata (pass-through at parse time).
|
|
func (b *Builder) Schema(child ParserID, name string) ParserID {
|
|
return b.add(&SchemaParser{Child: child, Name: name})
|
|
}
|
|
|
|
// SetRoot sets the root parser.
|
|
func (b *Builder) SetRoot(id ParserID) {
|
|
b.arena.root = id
|
|
}
|
|
|
|
// Build resolves references and returns the arena.
|
|
func (b *Builder) Build() *Arena {
|
|
b.arena.resolveRefs()
|
|
arena := b.arena
|
|
// Reset builder
|
|
b.arena = Arena{
|
|
rules: make(map[string]ParserID),
|
|
root: InvalidParserID,
|
|
}
|
|
return &arena
|
|
}
|
|
|
|
// parseCharClasses parses a character class expression and returns ranges and negation.
|
|
func parseCharClasses(classes string) ([]CharRange, bool) {
|
|
content := classes
|
|
negated := false
|
|
|
|
if len(content) > 0 && content[0] == '[' {
|
|
content = content[1:]
|
|
}
|
|
if len(content) > 0 && content[len(content)-1] == ']' {
|
|
content = content[:len(content)-1]
|
|
}
|
|
if len(content) > 0 && content[0] == '^' {
|
|
negated = true
|
|
content = content[1:]
|
|
}
|
|
|
|
var ranges []CharRange
|
|
i := 0
|
|
for i < len(content) {
|
|
startChar, startLen := ParseCharClassChar(content, i)
|
|
i += startLen
|
|
|
|
if i+1 < len(content) && content[i] == '-' {
|
|
endChar, endLen := ParseCharClassChar(content, i+1)
|
|
ranges = append(ranges, CharRange{Start: startChar, End: endChar})
|
|
i += 1 + endLen
|
|
} else {
|
|
ranges = append(ranges, CharRange{Start: startChar, End: startChar})
|
|
}
|
|
}
|
|
|
|
return ranges, negated
|
|
}
|
|
|
|
func ParseCharClassChar(content string, pos int) (rune, int) {
|
|
if content[pos] == '\\' && pos+1 < len(content) {
|
|
switch content[pos+1] {
|
|
case 'n':
|
|
return '\n', 2
|
|
case 't':
|
|
return '\t', 2
|
|
case 'r':
|
|
return '\r', 2
|
|
case '\\':
|
|
return '\\', 2
|
|
case ']':
|
|
return ']', 2
|
|
case '[':
|
|
return '[', 2
|
|
case 'x':
|
|
if r, n := parseHexEscape(content, pos+2, 2); n > 0 {
|
|
return r, 2 + n
|
|
}
|
|
return 'x', 2
|
|
case 'u':
|
|
if r, n := parseHexEscape(content, pos+2, 4); n > 0 {
|
|
return r, 2 + n
|
|
}
|
|
return 'u', 2
|
|
case 'U':
|
|
if r, n := parseHexEscape(content, pos+2, 8); n > 0 {
|
|
return r, 2 + n
|
|
}
|
|
return 'U', 2
|
|
default:
|
|
return rune(content[pos+1]), 2
|
|
}
|
|
}
|
|
return rune(content[pos]), 1
|
|
}
|
|
|
|
func parseHexEscape(s string, pos, count int) (rune, int) {
|
|
if pos+count > len(s) {
|
|
return 0, 0
|
|
}
|
|
var value rune
|
|
for i := range count {
|
|
c := s[pos+i]
|
|
value <<= 4
|
|
switch {
|
|
case c >= '0' && c <= '9':
|
|
value += rune(c - '0')
|
|
case c >= 'a' && c <= 'f':
|
|
value += rune(c-'a') + 10
|
|
case c >= 'A' && c <= 'F':
|
|
value += rune(c-'A') + 10
|
|
default:
|
|
return 0, 0
|
|
}
|
|
}
|
|
return value, count
|
|
}
|
|
|
|
// jsonNumberParser is a dedicated parser for JSON numbers used by JSONNumber().
|
|
type jsonNumberParser struct{}
|
|
|
|
func (p *jsonNumberParser) parse(_ *Arena, ctx *ParseContext, start int) ParseResult {
|
|
if start >= len(ctx.Input) {
|
|
if ctx.IsPartial {
|
|
return NewParseResultRange(NeedMoreInput, start, start)
|
|
}
|
|
return NewParseResult(Fail, start)
|
|
}
|
|
if ctx.Input[start] == '-' || (ctx.Input[start] >= '0' && ctx.Input[start] <= '9') {
|
|
return parseJSONNumber(ctx, start, start)
|
|
}
|
|
return NewParseResult(Fail, start)
|
|
}
|
|
|
|
// BuildPegParser is a helper that creates a parser using a builder function.
|
|
func BuildPegParser(fn func(b *Builder) ParserID) *Arena {
|
|
b := NewBuilder()
|
|
root := fn(b)
|
|
b.SetRoot(root)
|
|
return b.Build()
|
|
}
|