Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
ff55f311cb refactor: break down json grammar parser in different files
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 23:21:25 +02:00
8 changed files with 125 additions and 115 deletions

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=79167d9e49aef9caa98e13ee7ca067ec9f88b4b5
CPPLLAMA_VERSION?=081fe431aa8fb6307145c4feb3eed4f48cab19f8
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

View File

@@ -0,0 +1,47 @@
package functions
import "regexp"
var (
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
const (
SPACE_RULE = `" "?`
arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
)

View File

@@ -0,0 +1,22 @@
package functions
import "encoding/json"
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
dat, _ := json.Marshal(j)
return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
}

View File

@@ -18,6 +18,15 @@ type Function struct {
}
type Functions []Function
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Tool struct {
Type string `json:"type"`
Function Function `json:"function,omitempty"`
@@ -86,3 +95,8 @@ func (f Functions) Select(name string) Functions {
return funcs
}
func jsonString(v interface{}) string {
b, _ := json.Marshal(v)
return string(b)
}

View File

@@ -1,8 +1,10 @@
package functions
package functions_test
import (
"testing"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
@@ -11,3 +13,13 @@ func TestGrammar(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Grammar test suite")
}
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}

View File

@@ -5,70 +5,12 @@ package functions
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
"github.com/mudler/LocalAI/pkg/utils"
)
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)
var (
SPACE_RULE = `" "?`
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
type JSONSchemaConverter struct {
propOrder map[string]int
rules map[string]string
@@ -114,18 +56,6 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string {
return key
}
const arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
const array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
@@ -343,36 +273,3 @@ func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*Gramm
_ = json.Unmarshal(b, &schema)
return sc.Grammar(schema, options...)
}
func jsonString(v interface{}) string {
b, _ := json.Marshal(v)
return string(b)
}
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
dat, _ := json.Marshal(j)
return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
}

View File

@@ -9,16 +9,6 @@ import (
. "github.com/onsi/gomega"
)
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}
var testFunctions = []Item{
{
Type: "object",

View File

@@ -0,0 +1,28 @@
package functions
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)