Compare commits

...

8 Commits

Author SHA1 Message Date
Jeffrey Morgan
791650ddef sched: only error when over-allocating system memory (#5626) 2024-07-11 00:53:12 -07:00
Jeffrey Morgan
efbf41ed81 llm: dont link cuda with compat libs (#5621) 2024-07-10 20:01:52 -07:00
Michael Yang
cf15589851 Merge pull request #5620 from ollama/mxyng/templates
update embedded templates
2024-07-10 17:16:24 -07:00
Michael Yang
19753c18c0 update embedded templates 2024-07-10 17:03:08 -07:00
Michael Yang
41be28096a add system prompt to first legacy template 2024-07-10 17:03:08 -07:00
Michael Yang
37a570f962 Merge pull request #5612 from ollama/mxyng/mem
chatglm graph
2024-07-10 14:18:33 -07:00
Michael Yang
5a739ff4cb chatglm graph 2024-07-10 13:43:47 -07:00
Jeffrey Morgan
4e262eb2a8 remove GGML_CUDA_FORCE_MMQ=on from build (#5588) 2024-07-10 13:17:13 -07:00
33 changed files with 250 additions and 126 deletions

View File

@@ -178,7 +178,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
echo "Building custom CUDA GPU"
else
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DGGML_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} -DCMAKE_LIBRARY_PATH=/usr/local/cuda/compat"
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
fi
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"

View File

@@ -424,6 +424,32 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16,
)
case "chatglm":
fullOffload = 4 * batch * (embedding + vocab)
partialOffload = 4*batch*(embedding+vocab) + embedding*vocab*105/128
if qkvBias, ok := layers["blk.0"]["attn_qkv.bias"]; ok {
fullOffload = max(
fullOffload,
4*batch*(2+
2*embedding+
context+
context*heads+
embeddingHeadsK*heads+
qkvBias.Shape[0]),
)
partialOffload = max(
partialOffload,
4*batch*(1+
2*embedding+
embeddingHeadsK*heads+
context+
context*heads)+
4*embeddingHeadsK*context+
4*context*embeddingHeadsK+
4*qkvBias.Shape[0],
)
}
}
return

View File

@@ -122,6 +122,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
}
// On linux, over-allocating CPU memory will almost always result in an error
if runtime.GOOS == "linux" {
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
if systemMemoryRequired > systemTotalMemory {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "system", format.HumanBytes2(systemTotalMemory))
return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(systemTotalMemory))
}
}
estimate.log()
// Loop through potential servers

View File

@@ -161,7 +161,7 @@ func TestChatPrompt(t *testing.T) {
{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
},
expect: expect{
prompt: "You're a test, Harry! I-I'm a what? You are the Test Who Lived. A test. And a thumping good one at that, I'd wager. ",
prompt: "You are the Test Who Lived. You're a test, Harry! I-I'm a what? A test. And a thumping good one at that, I'd wager. ",
},
},
}

View File

@@ -546,8 +546,8 @@ func TestCreateDetectTemplate(t *testing.T) {
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
filepath.Join(p, "blobs", "sha256-9512c372dfc7d84d6065b8dd2b601aeed8cc1a78e7a7aa784a42fff37f5524b7"),
filepath.Join(p, "blobs", "sha256-b8b78cb8c6eefd14c06f1af042e6161255bf87bbf2dd14fce57cdac893db8139"),
filepath.Join(p, "blobs", "sha256-68b0323b2f21572bc09ba07554b16b379a5713ee48ef8c25a7661a1f71cfce77"),
filepath.Join(p, "blobs", "sha256-eb72fb7c550ee1f1dec4039bd65382acecf5f7536a30fb7ccace39a8d0cb590b"),
})
})

View File

@@ -135,11 +135,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
}
for {
cpus := s.getCpuFn()
var systemMem gpu.GpuInfo
if len(cpus) > 0 {
systemMem = cpus[0]
}
var runnerToExpire *runnerRef
s.loadedMu.Lock()
runner := s.loaded[pending.model.ModelPath]
@@ -193,38 +188,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
break
}
estimate := llm.EstimateGPULayers(gpus, ggml, pending.model.ProjectorPaths, pending.opts)
maxSize := systemMem.FreeMemory
// Add available GPU memory to the total pool
// macOS hardware has unified memory so don't double count
if runtime.GOOS != "darwin" {
for _, gpu := range gpus {
if gpu.Library == "cpu" {
continue
}
if loadedCount == 0 {
// If no other models are loaded, set the limit based on what's available
maxSize += gpu.FreeMemory
} else {
// Other models could be unloaded, favor total memory for limit
maxSize += gpu.TotalMemory
}
}
}
// Block attempting to load a model larger than system memory + GPU memory
if estimate.TotalSize > maxSize {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(estimate.TotalSize), "system", format.HumanBytes2(maxSize))
// Linux will crash if over-allocating memory - return an error to the user.
// TODO (jmorganca): add reasonable upper limits for darwin and windows as well
if runtime.GOOS == "linux" {
pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize))
break
}
}
// Evaluate if the model will fit in the available system memory, or if we should unload a model first
if len(gpus) == 1 && gpus[0].Library == "cpu" {
// simplifying assumption of defaultParallel when in CPU mode

View File

@@ -3,6 +3,6 @@
{{- end }}
{{- range .Messages }}<start_{{ .Role }}>{{ .Content }}<end_message>
{{- end }}<start_assistant>
{{- else }}
{{- else -}}
{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
{{- end }}
{{- end -}}

View File

@@ -1,6 +1,7 @@
{{- if .Messages }}
{{- if .System }}{{ .System }}
{{- end }}
{{ end }}
{{- range .Messages }}
{{- if eq .Role "user" }}### Instruction:
{{- else if eq .Role "assistant" }}### Response:
@@ -8,7 +9,7 @@
{{ .Content }}
{{ end }}### Response:
{{ else }}
{{ else -}}
{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}### Instruction:
@@ -16,4 +17,5 @@
{{ end }}### Response:
{{ .Response }}
{{- end }}
{{ end -}}

View File

@@ -5,11 +5,11 @@
{{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ else }}
{{ else -}}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ .Response }}<|im_end|>
{{- end }}
{{ end -}}

View File

@@ -8,10 +8,11 @@
{{- end }} {{ .Content }}
{{ end }}Assistant:
{{- else }}
{{- else -}}
{{ if .System }}System: {{ .System }}
{{ end }}{{ if .Prompt }}User: {{ .Prompt }}
{{ end }}Assistant: <|begin_of_text|>{{ .Response }}
{{- end }}
{{ end }}Assistant: {{ .Response }}
{{ end -}}

View File

@@ -7,13 +7,13 @@
{{ .Content }} <step> {{ end }}Source: assistant
Destination: user
{{ else }}
{{ if .System }} Source: system
{{ else -}}
{{ if .System }}Source: system
{{ .System }} <step>{{ end }} Source: user
{{ .System }} <step> {{ end }}Source: user
{{ .Prompt }} <step> Source: assistant
Destination: user
{{ .Response }}<step>
{{- end }}
{{ .Response }} <step>
{{- end -}}

View File

@@ -6,8 +6,10 @@
{{ else if eq .Role "assistant" }}Falcon:
{{ end }}{{ .Content }}
{{ end }}Falcon:
{{ else }}
{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}User: {{ .Prompt }}
{{ end }}Assistant: {{ .Response }}
{{- end }}
{{ else -}}
{{ if .System }}System: {{ .System }}
{{ end }}{{ if .Prompt }}User:
{{ .Prompt }}
{{ end }}Falcon:
{{ .Response }}
{{ end -}}

View File

@@ -8,9 +8,10 @@
{{- end }}
{{ .Content }}<end_of_turn>
{{ end }}<start_of_turn>model
{{ else }}
{{ else -}}
<start_of_turn>user
{{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}<end_of_turn>
{{ if .System }}{{ .System }}
{{ end }}{{ .Prompt }}<end_of_turn>
<start_of_turn>model
{{ .Response }}<end_of_turn>
{{- end }}
{{ end -}}

View File

@@ -10,9 +10,8 @@
{{ .Content }}
{{ end }}Answer:
{{ else }}
{{ if .System }}
System:
{{ else -}}
{{ if .System }}System:
{{ .System }}
{{ end }}{{ if .Prompt }}Question:
@@ -20,4 +19,5 @@ System:
{{ end }}Answer:
{{ .Response }}
{{- end }}
{{ end -}}

View File

@@ -9,8 +9,8 @@
{{- else }} [/INST] {{ .Content }}</s><s>
{{- end }}
{{- end }} [/INST]
{{- else }}
[INST] <<SYS>>{{ .System }}<</SYS>>
{{- else -}}
[INST] <<SYS>>{{ if .System }}{{ .System }}{{ end }}<</SYS>>
{{ .Prompt }} [/INST] {{ .Response }}
{{- end }}
{{ .Prompt }} [/INST] {{ .Response }}</s>
{{- end -}}

View File

@@ -8,7 +8,7 @@
{{ .Content }}<|eot_id|>
{{- end }}<|start_header_id|>assistant<|end_header_id|>
{{ else }}
{{ else -}}
{{ if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
@@ -16,4 +16,4 @@
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
{{ .Response }}<|eot_id|>
{{- end }}
{{- end -}}

View File

@@ -9,7 +9,7 @@
{{ .Content }}
{{ end }}@@ Response
{{ else }}
{{ else -}}
{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}@@ Instruction
@@ -17,4 +17,5 @@
{{ end }}@@ Response
{{ .Response }}
{{- end }}
{{ end -}}

View File

@@ -5,5 +5,6 @@
{{- else if eq .Role "assistant" }}[/INST] {{ .Content }}</s>
{{- end }}
{{- end }}[/INST]
{{- else }}[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST] {{ .Response }}
{{- end }}
{{- else -}}
[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}[/INST] {{ .Response }}</s>
{{- end -}}

View File

@@ -1,11 +1,11 @@
{{- if .Messages }}
{{- if .System }}GPT Correct System: {{ .System }}<|end_of_turn|>
{{- if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>
{{- end }}
{{- range .Messages }}GPT Correct
{{- range .Messages }}GPT4 Correct
{{- if eq .Role "user" }} User:
{{- else if eq .Role "assistant" }} Assistant:
{{- end }} {{ .Content }}<|end_of_turn|>
{{- end }}GPT Correct Assistant:
{{- else }}
{{ .System }}<|end_of_turn|>GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
{{- end }}
{{- end }}GPT4 Correct Assistant:
{{- else -}}
{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>{{ end }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
{{- end -}}

View File

@@ -5,11 +5,11 @@
{{- range .Messages }}<|{{ .Role }}|>
{{ .Content }}<|end|>
{{ end }}<|assistant|>
{{ else }}
{{ else -}}
{{ if .System }}<|system|>
{{ .System }}<|end|>
{{ end }}{{ if .Prompt }}<|user|>
{{ .Prompt }}<|end|>
{{ end }}<|assistant|>
{{ .Response }}<|end|>
{{- end }}
{{ end -}}

View File

@@ -10,7 +10,7 @@
{{ .Content }}</s>
{{ end }}
{{ end }}### Assistant:
{{ else }}
{{ else -}}
{{ if .System }}### System:
{{ .System }}
@@ -18,5 +18,6 @@
{{ .Prompt }}
{{ end }}### Assistant:
{{ .Response }}
{{- end }}
{{ .Response }}</s>
{{ end -}}

View File

@@ -11,14 +11,13 @@
{{ end }}
{{- end }}### Response
{{ else }}
{{ else -}}
{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}### Instruction
{{ .Prompt }}
{{ end }}### Response
{{ .Response }}<|endoftext|>
{{- end }}
{{ end -}}

View File

@@ -143,11 +143,14 @@ func (t *Template) Vars() []string {
type Values struct {
Messages []api.Message
// forceLegacy is a flag used to test compatibility with legacy templates
forceLegacy bool
}
func (t *Template) Execute(w io.Writer, v Values) error {
system, collated := collate(v.Messages)
if slices.Contains(t.Vars(), "messages") {
if !v.forceLegacy && slices.Contains(t.Vars(), "messages") {
return t.Template.Execute(w, map[string]any{
"System": system,
"Messages": collated,
@@ -157,15 +160,19 @@ func (t *Template) Execute(w io.Writer, v Values) error {
var b bytes.Buffer
var prompt, response string
for i, m := range collated {
if m.Role == "user" {
switch m.Role {
case "user":
prompt = m.Content
} else {
if i != 0 {
system = ""
}
case "assistant":
response = m.Content
}
if i != len(collated)-1 && prompt != "" && response != "" {
if err := t.Template.Execute(&b, map[string]any{
"System": "",
"System": system,
"Prompt": prompt,
"Response": response,
}); err != nil {
@@ -178,18 +185,21 @@ func (t *Template) Execute(w io.Writer, v Values) error {
}
var cut bool
tree := t.Template.Copy()
// for the last message, cut everything after "{{ .Response }}"
tree.Root.Nodes = slices.DeleteFunc(tree.Root.Nodes, func(n parse.Node) bool {
if slices.Contains(parseNode(n), "Response") {
cut = true
nodes := deleteNode(t.Template.Root.Copy(), func(n parse.Node) bool {
switch t := n.(type) {
case *parse.ActionNode:
case *parse.FieldNode:
if slices.Contains(t.Ident, "Response") {
cut = true
}
}
return cut
})
if err := template.Must(template.New("").AddParseTree("", tree)).Execute(&b, map[string]any{
"System": system,
tree := parse.Tree{Root: nodes.(*parse.ListNode)}
if err := template.Must(template.New("").AddParseTree("", &tree)).Execute(&b, map[string]any{
"System": "",
"Prompt": prompt,
}); err != nil {
return err
@@ -286,3 +296,72 @@ func parseNode(n parse.Node) []string {
return nil
}
// deleteNode walks the node list and deletes nodes that match the predicate
// this is currently to remove the {{ .Response }} node from templates
func deleteNode(n parse.Node, fn func(parse.Node) bool) parse.Node {
var walk func(n parse.Node) parse.Node
walk = func(n parse.Node) parse.Node {
if fn(n) {
return nil
}
switch t := n.(type) {
case *parse.ListNode:
var nodes []parse.Node
for _, c := range t.Nodes {
if n := walk(c); n != nil {
nodes = append(nodes, n)
}
}
t.Nodes = nodes
return t
case *parse.IfNode:
t.BranchNode = *(walk(&t.BranchNode).(*parse.BranchNode))
case *parse.WithNode:
t.BranchNode = *(walk(&t.BranchNode).(*parse.BranchNode))
case *parse.RangeNode:
t.BranchNode = *(walk(&t.BranchNode).(*parse.BranchNode))
case *parse.BranchNode:
t.List = walk(t.List).(*parse.ListNode)
if t.ElseList != nil {
t.ElseList = walk(t.ElseList).(*parse.ListNode)
}
case *parse.ActionNode:
n := walk(t.Pipe)
if n == nil {
return nil
}
t.Pipe = n.(*parse.PipeNode)
case *parse.PipeNode:
var commands []*parse.CommandNode
for _, c := range t.Cmds {
var args []parse.Node
for _, a := range c.Args {
if n := walk(a); n != nil {
args = append(args, n)
}
}
if len(args) == 0 {
return nil
}
c.Args = args
commands = append(commands, c)
}
if len(commands) == 0 {
return nil
}
t.Cmds = commands
}
return n
}
return walk(n)
}

View File

@@ -105,8 +105,8 @@ func TestTemplate(t *testing.T) {
}
for n, tt := range cases {
var actual bytes.Buffer
t.Run(n, func(t *testing.T) {
var actual bytes.Buffer
if err := tmpl.Execute(&actual, Values{Messages: tt}); err != nil {
t.Fatal(err)
}
@@ -120,6 +120,25 @@ func TestTemplate(t *testing.T) {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
t.Run("legacy", func(t *testing.T) {
var legacy bytes.Buffer
if err := tmpl.Execute(&legacy, Values{Messages: tt, forceLegacy: true}); err != nil {
t.Fatal(err)
}
legacyBytes := legacy.Bytes()
if slices.Contains([]string{"chatqa.gotmpl", "openchat.gotmpl", "vicuna.gotmpl"}, match) && legacyBytes[len(legacyBytes)-1] == ' ' {
t.Log("removing trailing space from legacy output")
legacyBytes = legacyBytes[:len(legacyBytes)-1]
} else if slices.Contains([]string{"codellama-70b-instruct.gotmpl", "llama2-chat.gotmpl", "mistral-instruct.gotmpl"}, match) {
t.Skip("legacy outputs cannot be compared to messages outputs")
}
if diff := cmp.Diff(legacyBytes, actual.Bytes()); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
})
}
@@ -136,6 +155,21 @@ func TestParse(t *testing.T) {
{"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system", "tools"}},
{"{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}", []string{"content", "messages", "role"}},
{"{{ range .Messages }}{{ if eq .Role \"system\" }}SYSTEM: {{ .Content }}{{ else if eq .Role \"user\" }}USER: {{ .Content }}{{ else if eq .Role \"assistant\" }}ASSISTANT: {{ .Content }}{{ end }}{{ end }}", []string{"content", "messages", "role"}},
{`{{- if .Messages }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}
{{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ else -}}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ .Response }}<|im_end|>
{{- end -}}`, []string{"content", "messages", "prompt", "response", "role", "system"}},
}
for _, tt := range cases {
@@ -145,9 +179,8 @@ func TestParse(t *testing.T) {
t.Fatal(err)
}
vars := tmpl.Vars()
if !slices.Equal(tt.vars, vars) {
t.Errorf("expected %v, got %v", tt.vars, vars)
if diff := cmp.Diff(tmpl.Vars(), tt.vars); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
@@ -170,7 +203,7 @@ func TestExecuteWithMessages(t *testing.T) {
{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
{"messages", `{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }}
{{- if eq .Role "user" }}[INST] {{ if and (eq $index 0) $.System }}{{ $.System }}{{ "\n\n" }}
{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
{{- end }}
{{- end }}`},
@@ -191,7 +224,7 @@ func TestExecuteWithMessages(t *testing.T) {
{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
{"messages", `
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }}
{{- if eq .Role "user" }}[INST] {{ if and (eq $index 0) $.System }}{{ $.System }}{{ "\n\n" }}
{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
{{- end }}
{{- end }}`},
@@ -204,9 +237,9 @@ func TestExecuteWithMessages(t *testing.T) {
{Role: "user", Content: "What is your name?"},
},
},
`[INST] Hello friend![/INST] Hello human![INST] You are a helpful assistant!
`[INST] You are a helpful assistant!
What is your name?[/INST] `,
Hello friend![/INST] Hello human![INST] What is your name?[/INST] `,
},
{
"chatml",
@@ -221,7 +254,7 @@ What is your name?[/INST] `,
`},
{"messages", `
{{- range $index, $_ := .Messages }}
{{- if and (eq .Role "user") (eq (len (slice $.Messages $index)) 1) $.System }}<|im_start|>system
{{- if and (eq .Role "user") (eq $index 0) $.System }}<|im_start|>system
{{ $.System }}<|im_end|>{{ "\n" }}
{{- end }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>{{ "\n" }}
@@ -236,12 +269,12 @@ What is your name?[/INST] `,
{Role: "user", Content: "What is your name?"},
},
},
`<|im_start|>user
`<|im_start|>system
You are a helpful assistant!<|im_end|>
<|im_start|>user
Hello friend!<|im_end|>
<|im_start|>assistant
Hello human!<|im_end|>
<|im_start|>system
You are a helpful assistant!<|im_end|>
<|im_start|>user
What is your name?<|im_end|>
<|im_start|>assistant
@@ -300,8 +333,8 @@ Answer: `,
t.Fatal(err)
}
if b.String() != tt.expected {
t.Errorf("expected\n%s,\ngot\n%s", tt.expected, b.String())
if diff := cmp.Diff(b.String(), tt.expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}

View File

@@ -1,4 +1,6 @@
You are a helpful assistant.### Instruction:
You are a helpful assistant.
### Instruction:
Hello, how are you?
### Response:

View File

@@ -9,3 +9,4 @@ Source: system
I'd like to show off how chat templating works! <step> Source: assistant
Destination: user

View File

@@ -3,3 +3,4 @@ Source: user
Hello, how are you? <step> Source: assistant
Destination: user

View File

@@ -7,3 +7,4 @@ Source: user
I'd like to show off how chat templating works! <step> Source: assistant
Destination: user

View File

@@ -1 +1 @@
GPT Correct System: You are a helpful assistant.<|end_of_turn|>GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant:
GPT4 Correct System: You are a helpful assistant.<|end_of_turn|>GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT4 Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT4 Correct Assistant:

View File

@@ -1 +1 @@
GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant:
GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant:

View File

@@ -1 +1 @@
GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant:
GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT4 Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT4 Correct Assistant:

View File

@@ -7,8 +7,9 @@
{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }}</s>
{{ end }}
{{- end }}ASSISTANT:
{{- else }}
{{- else -}}
{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}USER: {{ .Prompt }}
{{ end }}ASSISTANT: {{ .Response }}
{{- end }}
{{ end }}ASSISTANT: {{ .Response }}</s>
{{ end -}}

View File

@@ -5,11 +5,11 @@
{{- range .Messages }}<|{{ .Role }}|>
{{ .Content }}</s>
{{ end }}<|assistant|>
{{ else }}
{{ else -}}
{{ if .System }}<|system|>
{{ .System }}</s>
{{ end }}{{ if .Prompt }}<|user|>
{{ .Prompt }}</s>
{{ end }}<|assistant|>
{{ .Response }}</s>
{{- end }}
{{ end -}}