diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 4fd88500b..aeb3c8426 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,25 +1,48 @@
name: gpt-4
mmap: true
parameters:
- model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
+ model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
template:
chat_message: |
- <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
{{if .Content}}{{.Content}}{{end}}
+ {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
<|im_end|>
+ # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+ function: |
+ <|im_start|>system
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+ For each function call return a json object with function name and arguments within XML tags as follows:
+
+ {'arguments': , 'name': }
+ <|im_end|>
+ {{.Input}}
+ <|im_start|>assistant
+
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
-context_size: 2048
+context_size: 4096
f16: true
stopwords:
- <|im_end|>
-
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "phi-2-chat",
+ "model": "gpt-4",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index f5f03eb4e..837b6e12a 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
input.Grammar = grammar.JSONBNF
}
+ config.Grammar = input.Grammar
+
// process functions if we have any defined or if we have a function call string
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
log.Debug().Msgf("Response needs to process functions")
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index a67f09938..699234750 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
input.Grammar = grammar.JSONBNF
}
+ config.Grammar = input.Grammar
+
log.Debug().Msgf("Parameter Config: %+v", config)
if input.Stream {
diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml
new file mode 100644
index 000000000..9207d2832
--- /dev/null
+++ b/embedded/models/phi-2-orange.yaml
@@ -0,0 +1,30 @@
+name: phi-2-chat
+mmap: true
+parameters:
+ model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
+
+template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ <|im_end|>
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 4096
+f16: true
+stopwords:
+- <|im_end|>
+-
+
+description: |
+ This model is a chatbot that can be used for general conversation.
+ [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "phi-2-chat",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
index c52d789ef..8fcd1280d 100644
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -2,6 +2,7 @@ package e2e_test
import (
"context"
+ "encoding/json"
"fmt"
"io"
"net/http"
@@ -9,8 +10,8 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
-
"github.com/sashabaranov/go-openai"
+ "github.com/sashabaranov/go-openai/jsonschema"
)
var _ = Describe("E2E test", func() {
@@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() {
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
})
})
+
+ Context("function calls", func() {
+ It("correctly invoke", func() {
+ params := jsonschema.Definition{
+ Type: jsonschema.Object,
+ Properties: map[string]jsonschema.Definition{
+ "location": {
+ Type: jsonschema.String,
+ Description: "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {
+ Type: jsonschema.String,
+ Enum: []string{"celsius", "fahrenheit"},
+ },
+ },
+ Required: []string{"location"},
+ }
+
+ f := openai.FunctionDefinition{
+ Name: "get_current_weather",
+ Description: "Get the current weather in a given location",
+ Parameters: params,
+ }
+ t := openai.Tool{
+ Type: openai.ToolTypeFunction,
+ Function: &f,
+ }
+
+ dialogue := []openai.ChatCompletionMessage{
+ {Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"},
+ }
+ resp, err := client.CreateChatCompletion(context.TODO(),
+ openai.ChatCompletionRequest{
+ Model: openai.GPT4,
+ Messages: dialogue,
+ Tools: []openai.Tool{t},
+ },
+ )
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+
+ msg := resp.Choices[0].Message
+ Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls))
+ Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name))
+ Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments))
+ })
+ })
+ Context("json", func() {
+ It("correctly", func() {
+ model := "gpt-4"
+
+ req := openai.ChatCompletionRequest{
+ ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject},
+ Model: model,
+ Messages: []openai.ChatCompletionMessage{
+ {
+
+ Role: "user",
+ Content: "An animal with 'name', 'gender' and 'legs' fields",
+ },
+ },
+ }
+
+ resp, err := client.CreateChatCompletion(context.TODO(), req)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+
+ var i map[string]interface{}
+ err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(i).To(HaveKey("name"))
+ Expect(i).To(HaveKey("gender"))
+ Expect(i).To(HaveKey("legs"))
+ })
+ })
+
Context("images", func() {
It("correctly", func() {
resp, err := client.CreateImage(context.TODO(),