From 35290e146b8b575cd691c844dd611ead3c111c0b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 31 Mar 2024 13:04:09 +0200 Subject: [PATCH] fix(grammar): respect JSONmode and grammar from user input (#1935) * fix(grammar): Fix JSON mode and custom grammar * tests(aio): add jsonmode test * tests(aio): add functioncall test * fix(aio): use hermes-2-pro-mistral as llm for CPU profile * add phi-2-orange --- aio/cpu/text-to-text.yaml | 31 ++++++++-- core/http/endpoints/openai/chat.go | 2 + core/http/endpoints/openai/completion.go | 2 + embedded/models/phi-2-orange.yaml | 30 +++++++++ tests/e2e-aio/e2e_test.go | 79 +++++++++++++++++++++++- 5 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 embedded/models/phi-2-orange.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 4fd88500b..aeb3c8426 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -1,25 +1,48 @@ name: gpt-4 mmap: true parameters: - model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf template: chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + chat: | {{.Input}} <|im_start|>assistant completion: | {{.Input}} -context_size: 2048 +context_size: 4096 f16: true stopwords: - <|im_end|> - usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2-chat", + "model": "gpt-4", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index f5f03eb4e..837b6e12a 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup input.Grammar = grammar.JSONBNF } + config.Grammar = input.Grammar + // process functions if we have any defined or if we have a function call string if len(input.Functions) > 0 && config.ShouldUseFunctions() { log.Debug().Msgf("Response needs to process functions") diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index a67f09938..699234750 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a input.Grammar = grammar.JSONBNF } + config.Grammar = input.Grammar + log.Debug().Msgf("Parameter Config: %+v", config) if input.Stream { diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml new file mode 100644 index 000000000..9207d2832 --- /dev/null +++ b/embedded/models/phi-2-orange.yaml @@ -0,0 +1,30 @@ +name: phi-2-chat +mmap: true +parameters: + model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf + +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}} + <|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- + +description: | + This model is a chatbot that can be used for general conversation. + [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF) + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2-chat", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go index c52d789ef..8fcd1280d 100644 --- a/tests/e2e-aio/e2e_test.go +++ b/tests/e2e-aio/e2e_test.go @@ -2,6 +2,7 @@ package e2e_test import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -9,8 +10,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/sashabaranov/go-openai" + "github.com/sashabaranov/go-openai/jsonschema" ) var _ = Describe("E2E test", func() { @@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() { Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content)) }) }) + + Context("function calls", func() { + It("correctly invoke", func() { + params := jsonschema.Definition{ + Type: jsonschema.Object, + Properties: map[string]jsonschema.Definition{ + "location": { + Type: jsonschema.String, + Description: "The city and state, e.g. San Francisco, CA", + }, + "unit": { + Type: jsonschema.String, + Enum: []string{"celsius", "fahrenheit"}, + }, + }, + Required: []string{"location"}, + } + + f := openai.FunctionDefinition{ + Name: "get_current_weather", + Description: "Get the current weather in a given location", + Parameters: params, + } + t := openai.Tool{ + Type: openai.ToolTypeFunction, + Function: &f, + } + + dialogue := []openai.ChatCompletionMessage{ + {Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"}, + } + resp, err := client.CreateChatCompletion(context.TODO(), + openai.ChatCompletionRequest{ + Model: openai.GPT4, + Messages: dialogue, + Tools: []openai.Tool{t}, + }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + + msg := resp.Choices[0].Message + Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls)) + Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name)) + Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments)) + }) + }) + Context("json", func() { + It("correctly", func() { + model := "gpt-4" + + req := openai.ChatCompletionRequest{ + ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject}, + Model: model, + Messages: []openai.ChatCompletionMessage{ + { + + Role: "user", + Content: "An animal with 'name', 'gender' and 'legs' fields", + }, + }, + } + + resp, err := client.CreateChatCompletion(context.TODO(), req) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + + var i map[string]interface{} + err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i) + Expect(err).ToNot(HaveOccurred()) + Expect(i).To(HaveKey("name")) + Expect(i).To(HaveKey("gender")) + Expect(i).To(HaveKey("legs")) + }) + }) + Context("images", func() { It("correctly", func() { resp, err := client.CreateImage(context.TODO(),