mirror of
https://github.com/ollama/ollama.git
synced 2026-01-03 04:59:19 -05:00
Compare commits
23 Commits
pdevine/fi
...
royh-preci
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e210f8763f | ||
|
|
3971c2333f | ||
|
|
e5c65a85df | ||
|
|
33627331a3 | ||
|
|
36c87c433b | ||
|
|
179737feb7 | ||
|
|
47353f5ee4 | ||
|
|
10e768826c | ||
|
|
5056bb9c01 | ||
|
|
c4cf8ad559 | ||
|
|
57ec6901eb | ||
|
|
e64f9ebb44 | ||
|
|
791650ddef | ||
|
|
efbf41ed81 | ||
|
|
cf15589851 | ||
|
|
19753c18c0 | ||
|
|
41be28096a | ||
|
|
37a570f962 | ||
|
|
5a739ff4cb | ||
|
|
4e262eb2a8 | ||
|
|
c71698426c | ||
|
|
f93cdfdfae | ||
|
|
af370ac178 |
@@ -127,6 +127,10 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
|
||||
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
|
||||
; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
|
||||
|
||||
[InstallDelete]
|
||||
Type: filesandordirs; Name: "{%TEMP}\ollama*"
|
||||
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
|
||||
|
||||
[Messages]
|
||||
WizardReady=Ollama Windows Preview
|
||||
ReadyLabel1=%nLet's get you up and running with your own large language models.
|
||||
|
||||
@@ -657,7 +657,7 @@ func showInfo(resp *api.ShowResponse) {
|
||||
|
||||
modelData := [][]string{
|
||||
{"arch", arch},
|
||||
{"parameters", resp.Details.ParameterSize},
|
||||
{"parameters", format.Parameters(uint64(resp.ModelInfo["general.parameter_count"].(float64)))},
|
||||
{"quantization", resp.Details.QuantizationLevel},
|
||||
{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
|
||||
{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
|
||||
@@ -671,7 +671,7 @@ func showInfo(resp *api.ShowResponse) {
|
||||
if resp.ProjectorInfo != nil {
|
||||
projectorData := [][]string{
|
||||
{"arch", "clip"},
|
||||
{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
|
||||
{"parameters", format.Parameters(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
|
||||
}
|
||||
|
||||
if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok {
|
||||
|
||||
@@ -9,9 +9,10 @@ const (
|
||||
Thousand = 1000
|
||||
Million = Thousand * 1000
|
||||
Billion = Million * 1000
|
||||
Trillion = Billion * 1000
|
||||
)
|
||||
|
||||
func HumanNumber(b uint64) string {
|
||||
func RoundedParameter(b uint64) string {
|
||||
switch {
|
||||
case b >= Billion:
|
||||
number := float64(b) / Billion
|
||||
@@ -31,3 +32,33 @@ func HumanNumber(b uint64) string {
|
||||
return fmt.Sprintf("%d", b)
|
||||
}
|
||||
}
|
||||
|
||||
func Parameters(b uint64) string {
|
||||
switch {
|
||||
case b >= Trillion:
|
||||
number := float64(b) / Trillion
|
||||
return fmt.Sprintf("%sT", decimalPlace(number))
|
||||
case b >= Billion:
|
||||
number := float64(b) / Billion
|
||||
return fmt.Sprintf("%sB", decimalPlace(number))
|
||||
case b >= Million:
|
||||
number := float64(b) / Million
|
||||
return fmt.Sprintf("%sM", decimalPlace(number))
|
||||
case b >= Thousand:
|
||||
number := float64(b) / Thousand
|
||||
return fmt.Sprintf("%sK", decimalPlace(number))
|
||||
default:
|
||||
return fmt.Sprintf("%d", b)
|
||||
}
|
||||
}
|
||||
|
||||
func decimalPlace(number float64) string {
|
||||
switch {
|
||||
case number >= 100:
|
||||
return fmt.Sprintf("%.0f", number)
|
||||
case number >= 10:
|
||||
return fmt.Sprintf("%.1f", number)
|
||||
default:
|
||||
return fmt.Sprintf("%.2f", number)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestHumanNumber(t *testing.T) {
|
||||
func TestRoundedParameter(t *testing.T) {
|
||||
type testCase struct {
|
||||
input uint64
|
||||
expected string
|
||||
@@ -24,7 +24,34 @@ func TestHumanNumber(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.expected, func(t *testing.T) {
|
||||
result := HumanNumber(tc.input)
|
||||
result := RoundedParameter(tc.input)
|
||||
if result != tc.expected {
|
||||
t.Errorf("Expected %s, got %s", tc.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParameters(t *testing.T) {
|
||||
type testCase struct {
|
||||
input uint64
|
||||
expected string
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{26000000, "26.0M"},
|
||||
{26000000000, "26.0B"},
|
||||
{1000, "1.00K"},
|
||||
{1000000, "1.00M"},
|
||||
{1000000000, "1.00B"},
|
||||
{1000000000000, "1.00T"},
|
||||
{100, "100"},
|
||||
{206000000, "206M"},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.expected, func(t *testing.T) {
|
||||
result := Parameters(tc.input)
|
||||
if result != tc.expected {
|
||||
t.Errorf("Expected %s, got %s", tc.expected, result)
|
||||
}
|
||||
|
||||
@@ -360,14 +360,17 @@ func GetGPUInfo() GpuInfoList {
|
||||
"before",
|
||||
"total", format.HumanBytes2(cpus[0].TotalMemory),
|
||||
"free", format.HumanBytes2(cpus[0].FreeMemory),
|
||||
"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
|
||||
),
|
||||
slog.Group(
|
||||
"now",
|
||||
"total", format.HumanBytes2(mem.TotalMemory),
|
||||
"free", format.HumanBytes2(mem.FreeMemory),
|
||||
"free_swap", format.HumanBytes2(mem.FreeSwap),
|
||||
),
|
||||
)
|
||||
cpus[0].FreeMemory = mem.FreeMemory
|
||||
cpus[0].FreeSwap = mem.FreeSwap
|
||||
}
|
||||
|
||||
var memInfo C.mem_info_t
|
||||
|
||||
@@ -57,6 +57,7 @@ func GetCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||
FreeMemory: uint64(C.getFreeMemory()),
|
||||
// FreeSwap omitted as Darwin uses dynamic paging
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ var OneapiMgmtName = "libze_intel_gpu.so"
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
var mem memInfo
|
||||
var total, available, free, buffers, cached uint64
|
||||
var total, available, free, buffers, cached, freeSwap uint64
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return mem, err
|
||||
@@ -70,20 +70,21 @@ func GetCPUMem() (memInfo, error) {
|
||||
_, err = fmt.Sscanf(line, "Buffers:%d", &buffers)
|
||||
case strings.HasPrefix(line, "Cached:"):
|
||||
_, err = fmt.Sscanf(line, "Cached:%d", &cached)
|
||||
case strings.HasPrefix(line, "SwapFree:"):
|
||||
_, err = fmt.Sscanf(line, "SwapFree:%d", &freeSwap)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return mem, err
|
||||
}
|
||||
|
||||
if total > 0 && available > 0 {
|
||||
mem.TotalMemory = total * format.KibiByte
|
||||
mem.FreeMemory = available * format.KibiByte
|
||||
return mem, nil
|
||||
}
|
||||
}
|
||||
mem.TotalMemory = total * format.KibiByte
|
||||
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
|
||||
mem.FreeSwap = freeSwap * format.KibiByte
|
||||
if available > 0 {
|
||||
mem.FreeMemory = available * format.KibiByte
|
||||
} else {
|
||||
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
|
||||
}
|
||||
return mem, nil
|
||||
}
|
||||
|
||||
@@ -51,5 +51,5 @@ func GetCPUMem() (memInfo, error) {
|
||||
if r1 == 0 {
|
||||
return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
|
||||
}
|
||||
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys}, nil
|
||||
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
type memInfo struct {
|
||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||
FreeSwap uint64 `json:"free_swap,omitempty"`
|
||||
}
|
||||
|
||||
// Beginning of an `ollama info` command
|
||||
|
||||
@@ -178,7 +178,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
|
||||
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
|
||||
echo "Building custom CUDA GPU"
|
||||
else
|
||||
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DGGML_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} -DCMAKE_LIBRARY_PATH=/usr/local/cuda/compat"
|
||||
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
|
||||
fi
|
||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}"
|
||||
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
|
||||
|
||||
26
llm/ggml.go
26
llm/ggml.go
@@ -424,6 +424,32 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
|
||||
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16,
|
||||
)
|
||||
case "chatglm":
|
||||
fullOffload = 4 * batch * (embedding + vocab)
|
||||
partialOffload = 4*batch*(embedding+vocab) + embedding*vocab*105/128
|
||||
if qkvBias, ok := layers["blk.0"]["attn_qkv.bias"]; ok {
|
||||
fullOffload = max(
|
||||
fullOffload,
|
||||
4*batch*(2+
|
||||
2*embedding+
|
||||
context+
|
||||
context*heads+
|
||||
embeddingHeadsK*heads+
|
||||
qkvBias.Shape[0]),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
partialOffload,
|
||||
4*batch*(1+
|
||||
2*embedding+
|
||||
embeddingHeadsK*heads+
|
||||
context+
|
||||
context*heads)+
|
||||
4*embeddingHeadsK*context+
|
||||
4*context*embeddingHeadsK+
|
||||
4*qkvBias.Shape[0],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
@@ -33,7 +33,7 @@ func Quantize(infile, outfile string, ftype fileType) error {
|
||||
params.ftype = ftype.Value()
|
||||
|
||||
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||
return fmt.Errorf("llama_model_quantize: %d", rc)
|
||||
return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -88,6 +88,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
var estimate MemoryEstimate
|
||||
var systemTotalMemory uint64
|
||||
var systemFreeMemory uint64
|
||||
var systemSwapFreeMemory uint64
|
||||
|
||||
systemMemInfo, err := gpu.GetCPUMem()
|
||||
if err != nil {
|
||||
@@ -95,7 +96,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
} else {
|
||||
systemTotalMemory = systemMemInfo.TotalMemory
|
||||
systemFreeMemory = systemMemInfo.FreeMemory
|
||||
slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", systemFreeMemory)
|
||||
systemSwapFreeMemory = systemMemInfo.FreeSwap
|
||||
slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
||||
}
|
||||
|
||||
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
||||
@@ -122,6 +124,16 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
}
|
||||
|
||||
// On linux, over-allocating CPU memory will almost always result in an error
|
||||
if runtime.GOOS == "linux" {
|
||||
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
|
||||
available := min(systemTotalMemory, systemFreeMemory+systemSwapFreeMemory)
|
||||
if systemMemoryRequired > available {
|
||||
slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
|
||||
return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))
|
||||
}
|
||||
}
|
||||
|
||||
estimate.log()
|
||||
|
||||
// Loop through potential servers
|
||||
|
||||
@@ -466,7 +466,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
if baseLayer.GGML != nil {
|
||||
config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name())
|
||||
config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture())
|
||||
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
|
||||
config.ModelType = cmp.Or(config.ModelType, format.RoundedParameter(baseLayer.GGML.KV().ParameterCount()))
|
||||
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType().String())
|
||||
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
|
||||
}
|
||||
|
||||
@@ -161,7 +161,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
|
||||
},
|
||||
expect: expect{
|
||||
prompt: "You're a test, Harry! I-I'm a what? You are the Test Who Lived. A test. And a thumping good one at that, I'd wager. ",
|
||||
prompt: "You are the Test Who Lived. You're a test, Harry! I-I'm a what? A test. And a thumping good one at that, I'd wager. ",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -546,8 +546,8 @@ func TestCreateDetectTemplate(t *testing.T) {
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
|
||||
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
|
||||
filepath.Join(p, "blobs", "sha256-9512c372dfc7d84d6065b8dd2b601aeed8cc1a78e7a7aa784a42fff37f5524b7"),
|
||||
filepath.Join(p, "blobs", "sha256-b8b78cb8c6eefd14c06f1af042e6161255bf87bbf2dd14fce57cdac893db8139"),
|
||||
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
|
||||
filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -135,11 +135,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
}
|
||||
|
||||
for {
|
||||
cpus := s.getCpuFn()
|
||||
var systemMem gpu.GpuInfo
|
||||
if len(cpus) > 0 {
|
||||
systemMem = cpus[0]
|
||||
}
|
||||
var runnerToExpire *runnerRef
|
||||
s.loadedMu.Lock()
|
||||
runner := s.loaded[pending.model.ModelPath]
|
||||
@@ -193,38 +188,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
break
|
||||
}
|
||||
|
||||
estimate := llm.EstimateGPULayers(gpus, ggml, pending.model.ProjectorPaths, pending.opts)
|
||||
maxSize := systemMem.FreeMemory
|
||||
|
||||
// Add available GPU memory to the total pool
|
||||
// macOS hardware has unified memory so don't double count
|
||||
if runtime.GOOS != "darwin" {
|
||||
for _, gpu := range gpus {
|
||||
if gpu.Library == "cpu" {
|
||||
continue
|
||||
}
|
||||
if loadedCount == 0 {
|
||||
// If no other models are loaded, set the limit based on what's available
|
||||
maxSize += gpu.FreeMemory
|
||||
} else {
|
||||
// Other models could be unloaded, favor total memory for limit
|
||||
maxSize += gpu.TotalMemory
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Block attempting to load a model larger than system memory + GPU memory
|
||||
if estimate.TotalSize > maxSize {
|
||||
slog.Warn("model request too large for system", "requested", format.HumanBytes2(estimate.TotalSize), "system", format.HumanBytes2(maxSize))
|
||||
|
||||
// Linux will crash if over-allocating memory - return an error to the user.
|
||||
// TODO (jmorganca): add reasonable upper limits for darwin and windows as well
|
||||
if runtime.GOOS == "linux" {
|
||||
pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Evaluate if the model will fit in the available system memory, or if we should unload a model first
|
||||
if len(gpus) == 1 && gpus[0].Library == "cpu" {
|
||||
// simplifying assumption of defaultParallel when in CPU mode
|
||||
|
||||
@@ -1,8 +1 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}<start_system>{{ .System }}<end_message>
|
||||
{{- end }}
|
||||
{{- range .Messages }}<start_{{ .Role }}>{{ .Content }}<end_message>
|
||||
{{- end }}<start_assistant>
|
||||
{{- else }}
|
||||
{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
|
||||
{{- end }}
|
||||
{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
|
||||
@@ -1,14 +1,3 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}{{ .System }}
|
||||
{{- end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}### Instruction:
|
||||
{{- else if eq .Role "assistant" }}### Response:
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}### Response:
|
||||
{{ else }}
|
||||
{{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}### Instruction:
|
||||
@@ -16,4 +5,4 @@
|
||||
|
||||
{{ end }}### Response:
|
||||
{{ .Response }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
@@ -1,15 +1,6 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ else }}
|
||||
{{ if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}{{ if .Prompt }}<|im_start|>user
|
||||
{{ .Prompt }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ .Response }}<|im_end|>
|
||||
{{- end }}
|
||||
@@ -1,17 +1,6 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}System: {{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}User:
|
||||
{{- else if eq .Role "assistant" }}Assistant:
|
||||
{{- end }} {{ .Content }}
|
||||
|
||||
{{ end }}Assistant:
|
||||
{{- else }}
|
||||
{{ if .System }}System: {{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}User: {{ .Prompt }}
|
||||
|
||||
{{ end }}Assistant: <|begin_of_text|>{{ .Response }}
|
||||
{{- end }}
|
||||
{{ end }}Assistant: {{ .Response }}
|
||||
|
||||
|
||||
@@ -1,19 +1,10 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}Source: system
|
||||
{{ if .System }}Source: system
|
||||
|
||||
{{ .System }} <step> {{ end }}
|
||||
{{- range .Messages }}Source: {{ .Role }}
|
||||
|
||||
{{ .Content }} <step> {{ end }}Source: assistant
|
||||
Destination: user
|
||||
|
||||
{{ else }}
|
||||
{{ if .System }} Source: system
|
||||
|
||||
{{ .System }} <step>{{ end }} Source: user
|
||||
{{ .System }} <step> {{ end }}Source: user
|
||||
|
||||
{{ .Prompt }} <step> Source: assistant
|
||||
{{- if not .Response }}
|
||||
Destination: user
|
||||
{{- end }}
|
||||
|
||||
{{ .Response }}<step>
|
||||
{{- end }}
|
||||
{{ .Response }} <step>
|
||||
@@ -1,13 +1,5 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}System: {{ .System }}
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}User:
|
||||
{{ else if eq .Role "assistant" }}Falcon:
|
||||
{{ end }}{{ .Content }}
|
||||
{{ if .System }}System: {{ .System }}
|
||||
{{ end }}{{ if .Prompt }}User:
|
||||
{{ .Prompt }}
|
||||
{{ end }}Falcon:
|
||||
{{ else }}
|
||||
{{ if .System }}{{ .System }}
|
||||
{{ end }}{{ if .Prompt }}User: {{ .Prompt }}
|
||||
{{ end }}Assistant: {{ .Response }}
|
||||
{{- end }}
|
||||
{{ .Response }}
|
||||
|
||||
@@ -1,16 +1,5 @@
|
||||
{{- if .Messages }}
|
||||
{{- range $index, $_ := .Messages }}<start_of_turn>
|
||||
{{- if eq .Role "user" }}user
|
||||
{{- if and $.System (eq $index 0) }}
|
||||
{{ $.System }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "assistant" }}model
|
||||
{{- end }}
|
||||
{{ .Content }}<end_of_turn>
|
||||
{{ end }}<start_of_turn>model
|
||||
{{ else }}
|
||||
<start_of_turn>user
|
||||
{{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}<end_of_turn>
|
||||
{{ if .System }}{{ .System }}
|
||||
{{ end }}{{ .Prompt }}<end_of_turn>
|
||||
<start_of_turn>model
|
||||
{{ .Response }}<end_of_turn>
|
||||
{{- end }}
|
||||
@@ -1,18 +1,4 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}System:
|
||||
{{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}Question:
|
||||
{{- else if eq .Role "assistant" }}Answer:
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}Answer:
|
||||
{{ else }}
|
||||
{{ if .System }}
|
||||
System:
|
||||
{{ if .System }}System:
|
||||
{{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}Question:
|
||||
@@ -20,4 +6,4 @@ System:
|
||||
|
||||
{{ end }}Answer:
|
||||
{{ .Response }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
@@ -1,16 +1,6 @@
|
||||
{{- if .Messages }}
|
||||
{{- range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "user" }}[INST] {{ if eq $index 0 }}<<SYS>>
|
||||
{{- if $.System }}
|
||||
{{ $.System }}
|
||||
[INST] <<SYS>>
|
||||
{{- if .System }}
|
||||
{{ .System }}
|
||||
{{ end }}<</SYS>>
|
||||
|
||||
{{ end }}{{ .Content }}
|
||||
{{- else }} [/INST] {{ .Content }}</s><s>
|
||||
{{- end }}
|
||||
{{- end }} [/INST]
|
||||
{{- else }}
|
||||
[INST] <<SYS>>{{ .System }}<</SYS>>
|
||||
|
||||
{{ .Prompt }} [/INST] {{ .Response }}
|
||||
{{- end }}
|
||||
{{ .Prompt }} [/INST] {{ .Response }}</s><s>
|
||||
@@ -1,19 +1,7 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
{{ .System }}<|eot_id|>
|
||||
{{- end }}
|
||||
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
||||
|
||||
{{ .Content }}<|eot_id|>
|
||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ else }}
|
||||
{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
||||
|
||||
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ .Response }}<|eot_id|>
|
||||
{{- end }}
|
||||
{{ .Response }}<|eot_id|>
|
||||
@@ -1,15 +1,3 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}{{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}@@ Instruction
|
||||
{{- else if eq .Role "assistant" }}@@ Response
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}@@ Response
|
||||
{{ else }}
|
||||
{{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}@@ Instruction
|
||||
@@ -17,4 +5,4 @@
|
||||
|
||||
{{ end }}@@ Response
|
||||
{{ .Response }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
@@ -1,9 +1,3 @@
|
||||
{{- if .Messages }}
|
||||
{{- range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "user" }}[INST] {{ if and $.System (eq (len (slice $.Messages $index)) 1) }}{{ $.System }}
|
||||
{{ end }}{{ .Content }}
|
||||
{{- else if eq .Role "assistant" }}[/INST] {{ .Content }}</s>
|
||||
{{- end }}
|
||||
{{- end }}[/INST]
|
||||
{{- else }}[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST] {{ .Response }}
|
||||
{{- end }}
|
||||
[INST] {{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ .Prompt }}[/INST] {{ .Response }}</s>
|
||||
@@ -1,11 +1 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}GPT Correct System: {{ .System }}<|end_of_turn|>
|
||||
{{- end }}
|
||||
{{- range .Messages }}GPT Correct
|
||||
{{- if eq .Role "user" }} User:
|
||||
{{- else if eq .Role "assistant" }} Assistant:
|
||||
{{- end }} {{ .Content }}<|end_of_turn|>
|
||||
{{- end }}GPT Correct Assistant:
|
||||
{{- else }}
|
||||
{{ .System }}<|end_of_turn|>GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
|
||||
{{- end }}
|
||||
{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>{{ end }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
|
||||
@@ -1,15 +1,6 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}<|system|>
|
||||
{{ .System }}<|end|>
|
||||
{{ end }}
|
||||
{{- range .Messages }}<|{{ .Role }}|>
|
||||
{{ .Content }}<|end|>
|
||||
{{ end }}<|assistant|>
|
||||
{{ else }}
|
||||
{{ if .System }}<|system|>
|
||||
{{ .System }}<|end|>
|
||||
{{ end }}{{ if .Prompt }}<|user|>
|
||||
{{ .Prompt }}<|end|>
|
||||
{{ end }}<|assistant|>
|
||||
{{ .Response }}<|end|>
|
||||
{{- end }}
|
||||
@@ -1,16 +1,3 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}### System:
|
||||
{{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}### User:
|
||||
{{ .Content }}
|
||||
{{ else if eq .Role "assistant" }}### Assistant:
|
||||
{{ .Content }}</s>
|
||||
{{ end }}
|
||||
{{ end }}### Assistant:
|
||||
{{ else }}
|
||||
{{ if .System }}### System:
|
||||
{{ .System }}
|
||||
|
||||
@@ -18,5 +5,5 @@
|
||||
{{ .Prompt }}
|
||||
|
||||
{{ end }}### Assistant:
|
||||
{{ .Response }}
|
||||
{{- end }}
|
||||
{{ .Response }}</s>
|
||||
|
||||
|
||||
@@ -1,24 +1,8 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}{{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}### Instruction
|
||||
{{ .Content }}
|
||||
|
||||
{{ else if eq .Role "assistant" }}### Response
|
||||
{{ .Content }}<|endoftext|>
|
||||
|
||||
{{ end }}
|
||||
{{- end }}### Response
|
||||
{{ else }}
|
||||
{{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}### Instruction
|
||||
{{ .Prompt }}
|
||||
|
||||
|
||||
{{ end }}### Response
|
||||
{{ .Response }}<|endoftext|>
|
||||
|
||||
{{- end }}
|
||||
@@ -143,11 +143,14 @@ func (t *Template) Vars() []string {
|
||||
|
||||
type Values struct {
|
||||
Messages []api.Message
|
||||
|
||||
// forceLegacy is a flag used to test compatibility with legacy templates
|
||||
forceLegacy bool
|
||||
}
|
||||
|
||||
func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
system, collated := collate(v.Messages)
|
||||
if slices.Contains(t.Vars(), "messages") {
|
||||
if !v.forceLegacy && slices.Contains(t.Vars(), "messages") {
|
||||
return t.Template.Execute(w, map[string]any{
|
||||
"System": system,
|
||||
"Messages": collated,
|
||||
@@ -157,39 +160,46 @@ func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
var b bytes.Buffer
|
||||
var prompt, response string
|
||||
for i, m := range collated {
|
||||
if m.Role == "user" {
|
||||
switch m.Role {
|
||||
case "system":
|
||||
system = m.Content
|
||||
case "user":
|
||||
prompt = m.Content
|
||||
} else {
|
||||
case "assistant":
|
||||
response = m.Content
|
||||
}
|
||||
|
||||
if i != len(collated)-1 && prompt != "" && response != "" {
|
||||
if err := t.Template.Execute(&b, map[string]any{
|
||||
"System": "",
|
||||
"System": system,
|
||||
"Prompt": prompt,
|
||||
"Response": response,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
system = ""
|
||||
prompt = ""
|
||||
response = ""
|
||||
}
|
||||
}
|
||||
|
||||
var cut bool
|
||||
tree := t.Template.Copy()
|
||||
// for the last message, cut everything after "{{ .Response }}"
|
||||
tree.Root.Nodes = slices.DeleteFunc(tree.Root.Nodes, func(n parse.Node) bool {
|
||||
if slices.Contains(parseNode(n), "Response") {
|
||||
cut = true
|
||||
nodes := deleteNode(t.Template.Root.Copy(), func(n parse.Node) bool {
|
||||
switch t := n.(type) {
|
||||
case *parse.ActionNode:
|
||||
case *parse.FieldNode:
|
||||
if slices.Contains(t.Ident, "Response") {
|
||||
cut = true
|
||||
}
|
||||
}
|
||||
|
||||
return cut
|
||||
})
|
||||
|
||||
if err := template.Must(template.New("").AddParseTree("", tree)).Execute(&b, map[string]any{
|
||||
"System": system,
|
||||
tree := parse.Tree{Root: nodes.(*parse.ListNode)}
|
||||
if err := template.Must(template.New("").AddParseTree("", &tree)).Execute(&b, map[string]any{
|
||||
"System": "",
|
||||
"Prompt": prompt,
|
||||
}); err != nil {
|
||||
return err
|
||||
@@ -199,25 +209,16 @@ func (t *Template) Execute(w io.Writer, v Values) error {
|
||||
return err
|
||||
}
|
||||
|
||||
type messages []*api.Message
|
||||
|
||||
// collate messages based on role. consecutive messages of the same role are merged
|
||||
// into a single message. collate also pulls out and merges messages with Role == "system"
|
||||
// which are templated separately. As a side effect, it mangles message content adding image
|
||||
// tags ([img-%d]) as needed
|
||||
func collate(msgs []api.Message) (system string, collated messages) {
|
||||
// into a single message. collate also collects and returns all system messages.
|
||||
// collate mutates message content adding image tags ([img-%d]) as needed
|
||||
func collate(msgs []api.Message) (string, []*api.Message) {
|
||||
var n int
|
||||
|
||||
var system []string
|
||||
var collated []*api.Message
|
||||
for i := range msgs {
|
||||
msg := msgs[i]
|
||||
if msg.Role == "system" {
|
||||
if system != "" {
|
||||
system += "\n\n"
|
||||
}
|
||||
|
||||
system += msg.Content
|
||||
continue
|
||||
}
|
||||
|
||||
for range msg.Images {
|
||||
imageTag := fmt.Sprintf("[img-%d]", n)
|
||||
if !strings.Contains(msg.Content, "[img]") {
|
||||
@@ -228,6 +229,10 @@ func collate(msgs []api.Message) (system string, collated messages) {
|
||||
n++
|
||||
}
|
||||
|
||||
if msg.Role == "system" {
|
||||
system = append(system, msg.Content)
|
||||
}
|
||||
|
||||
if len(collated) > 0 && collated[len(collated)-1].Role == msg.Role {
|
||||
collated[len(collated)-1].Content += "\n\n" + msg.Content
|
||||
} else {
|
||||
@@ -235,7 +240,7 @@ func collate(msgs []api.Message) (system string, collated messages) {
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
return strings.Join(system, "\n\n"), collated
|
||||
}
|
||||
|
||||
func parseNode(n parse.Node) []string {
|
||||
@@ -286,3 +291,72 @@ func parseNode(n parse.Node) []string {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteNode walks the node list and deletes nodes that match the predicate
|
||||
// this is currently to remove the {{ .Response }} node from templates
|
||||
func deleteNode(n parse.Node, fn func(parse.Node) bool) parse.Node {
|
||||
var walk func(n parse.Node) parse.Node
|
||||
walk = func(n parse.Node) parse.Node {
|
||||
if fn(n) {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch t := n.(type) {
|
||||
case *parse.ListNode:
|
||||
var nodes []parse.Node
|
||||
for _, c := range t.Nodes {
|
||||
if n := walk(c); n != nil {
|
||||
nodes = append(nodes, n)
|
||||
}
|
||||
}
|
||||
|
||||
t.Nodes = nodes
|
||||
return t
|
||||
case *parse.IfNode:
|
||||
t.BranchNode = *(walk(&t.BranchNode).(*parse.BranchNode))
|
||||
case *parse.WithNode:
|
||||
t.BranchNode = *(walk(&t.BranchNode).(*parse.BranchNode))
|
||||
case *parse.RangeNode:
|
||||
t.BranchNode = *(walk(&t.BranchNode).(*parse.BranchNode))
|
||||
case *parse.BranchNode:
|
||||
t.List = walk(t.List).(*parse.ListNode)
|
||||
if t.ElseList != nil {
|
||||
t.ElseList = walk(t.ElseList).(*parse.ListNode)
|
||||
}
|
||||
case *parse.ActionNode:
|
||||
n := walk(t.Pipe)
|
||||
if n == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
t.Pipe = n.(*parse.PipeNode)
|
||||
case *parse.PipeNode:
|
||||
var commands []*parse.CommandNode
|
||||
for _, c := range t.Cmds {
|
||||
var args []parse.Node
|
||||
for _, a := range c.Args {
|
||||
if n := walk(a); n != nil {
|
||||
args = append(args, n)
|
||||
}
|
||||
}
|
||||
|
||||
if len(args) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
c.Args = args
|
||||
commands = append(commands, c)
|
||||
}
|
||||
|
||||
if len(commands) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
t.Cmds = commands
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
return walk(n)
|
||||
}
|
||||
|
||||
@@ -105,8 +105,8 @@ func TestTemplate(t *testing.T) {
|
||||
}
|
||||
|
||||
for n, tt := range cases {
|
||||
var actual bytes.Buffer
|
||||
t.Run(n, func(t *testing.T) {
|
||||
var actual bytes.Buffer
|
||||
if err := tmpl.Execute(&actual, Values{Messages: tt}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -116,7 +116,34 @@ func TestTemplate(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(actual.Bytes(), expect); diff != "" {
|
||||
bts := actual.Bytes()
|
||||
|
||||
if slices.Contains([]string{"chatqa.gotmpl", "llama2-chat.gotmpl", "mistral-instruct.gotmpl", "openchat.gotmpl", "vicuna.gotmpl"}, match) && bts[len(bts)-1] == ' ' {
|
||||
t.Log("removing trailing space from output")
|
||||
bts = bts[:len(bts)-1]
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(bts, expect); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("legacy", func(t *testing.T) {
|
||||
t.Skip("legacy outputs are currently default outputs")
|
||||
var legacy bytes.Buffer
|
||||
if err := tmpl.Execute(&legacy, Values{Messages: tt, forceLegacy: true}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
legacyBytes := legacy.Bytes()
|
||||
if slices.Contains([]string{"chatqa.gotmpl", "openchat.gotmpl", "vicuna.gotmpl"}, match) && legacyBytes[len(legacyBytes)-1] == ' ' {
|
||||
t.Log("removing trailing space from legacy output")
|
||||
legacyBytes = legacyBytes[:len(legacyBytes)-1]
|
||||
} else if slices.Contains([]string{"codellama-70b-instruct.gotmpl", "llama2-chat.gotmpl", "mistral-instruct.gotmpl"}, match) {
|
||||
t.Skip("legacy outputs cannot be compared to messages outputs")
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(legacyBytes, actual.Bytes()); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
@@ -135,7 +162,24 @@ func TestParse(t *testing.T) {
|
||||
{"{{ .System }} {{ .Prompt }} {{ .Response }}", []string{"prompt", "response", "system"}},
|
||||
{"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system", "tools"}},
|
||||
{"{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}", []string{"content", "messages", "role"}},
|
||||
{"{{ range .Messages }}{{ if eq .Role \"system\" }}SYSTEM: {{ .Content }}{{ else if eq .Role \"user\" }}USER: {{ .Content }}{{ else if eq .Role \"assistant\" }}ASSISTANT: {{ .Content }}{{ end }}{{ end }}", []string{"content", "messages", "role"}},
|
||||
{`{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}SYSTEM:
|
||||
{{- else if eq .Role "user" }}USER:
|
||||
{{- else if eq .Role "assistant" }}ASSISTANT:
|
||||
{{- end }} {{ .Content }}
|
||||
{{- end }}`, []string{"content", "messages", "role"}},
|
||||
{`{{- if .Messages }}
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ else -}}
|
||||
{{ if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}{{ if .Prompt }}<|im_start|>user
|
||||
{{ .Prompt }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ .Response }}<|im_end|>
|
||||
{{- end -}}`, []string{"content", "messages", "prompt", "response", "role", "system"}},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
@@ -145,9 +189,8 @@ func TestParse(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
vars := tmpl.Vars()
|
||||
if !slices.Equal(tt.vars, vars) {
|
||||
t.Errorf("expected %v, got %v", tt.vars, vars)
|
||||
if diff := cmp.Diff(tmpl.Vars(), tt.vars); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -167,12 +210,17 @@ func TestExecuteWithMessages(t *testing.T) {
|
||||
{
|
||||
"mistral",
|
||||
[]template{
|
||||
{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
|
||||
{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
|
||||
{"messages", `{{- range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }}
|
||||
{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
|
||||
{{- end }}
|
||||
{"no response", `[INST] {{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ .Prompt }}[/INST] `},
|
||||
{"response", `[INST] {{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
|
||||
{"messages", `[INST] {{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}[INST] {{ end }}
|
||||
{{- end }}`},
|
||||
},
|
||||
Values{
|
||||
@@ -187,13 +235,17 @@ func TestExecuteWithMessages(t *testing.T) {
|
||||
{
|
||||
"mistral system",
|
||||
[]template{
|
||||
{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
|
||||
{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
|
||||
{"messages", `
|
||||
{{- range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }}
|
||||
{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
|
||||
{{- end }}
|
||||
{"no response", `[INST] {{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ .Prompt }}[/INST] `},
|
||||
{"response", `[INST] {{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
|
||||
{"messages", `[INST] {{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}[INST] {{ end }}
|
||||
{{- end }}`},
|
||||
},
|
||||
Values{
|
||||
@@ -204,9 +256,9 @@ func TestExecuteWithMessages(t *testing.T) {
|
||||
{Role: "user", Content: "What is your name?"},
|
||||
},
|
||||
},
|
||||
`[INST] Hello friend![/INST] Hello human![INST] You are a helpful assistant!
|
||||
`[INST] You are a helpful assistant!
|
||||
|
||||
What is your name?[/INST] `,
|
||||
Hello friend![/INST] Hello human![INST] What is your name?[/INST] `,
|
||||
},
|
||||
{
|
||||
"chatml",
|
||||
@@ -220,12 +272,9 @@ What is your name?[/INST] `,
|
||||
{{ .Response }}<|im_end|>
|
||||
`},
|
||||
{"messages", `
|
||||
{{- range $index, $_ := .Messages }}
|
||||
{{- if and (eq .Role "user") (eq (len (slice $.Messages $index)) 1) $.System }}<|im_start|>system
|
||||
{{ $.System }}<|im_end|>{{ "\n" }}
|
||||
{{- end }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>{{ "\n" }}
|
||||
{{- end }}<|im_start|>assistant
|
||||
{{- range $index, $_ := .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
`},
|
||||
},
|
||||
Values{
|
||||
@@ -236,12 +285,12 @@ What is your name?[/INST] `,
|
||||
{Role: "user", Content: "What is your name?"},
|
||||
},
|
||||
},
|
||||
`<|im_start|>user
|
||||
`<|im_start|>system
|
||||
You are a helpful assistant!<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello friend!<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Hello human!<|im_end|>
|
||||
<|im_start|>system
|
||||
You are a helpful assistant!<|im_end|>
|
||||
<|im_start|>user
|
||||
What is your name?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
@@ -258,9 +307,11 @@ What is your name?<|im_end|>
|
||||
`},
|
||||
{"messages", `
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}Question: {{ .Content }}{{ "\n\n" }}
|
||||
{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ "\n\n" }}
|
||||
{{- end }}
|
||||
{{- if eq .Role "user" }}Question: {{ .Content }}
|
||||
|
||||
{{ else if eq .Role "assistant" }}Answer: {{ .Content }}
|
||||
|
||||
{{ end }}
|
||||
{{- end }}Answer: `},
|
||||
},
|
||||
Values{
|
||||
@@ -300,8 +351,8 @@ Answer: `,
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if b.String() != tt.expected {
|
||||
t.Errorf("expected\n%s,\ngot\n%s", tt.expected, b.String())
|
||||
if diff := cmp.Diff(b.String(), tt.expected); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
You are a helpful assistant.### Instruction:
|
||||
You are a helpful assistant.
|
||||
|
||||
### Instruction:
|
||||
Hello, how are you?
|
||||
|
||||
### Response:
|
||||
|
||||
@@ -9,3 +9,4 @@ Source: system
|
||||
I'd like to show off how chat templating works! <step> Source: assistant
|
||||
Destination: user
|
||||
|
||||
|
||||
@@ -3,3 +3,4 @@ Source: user
|
||||
Hello, how are you? <step> Source: assistant
|
||||
Destination: user
|
||||
|
||||
|
||||
@@ -7,3 +7,4 @@ Source: user
|
||||
I'd like to show off how chat templating works! <step> Source: assistant
|
||||
Destination: user
|
||||
|
||||
|
||||
@@ -2,4 +2,6 @@
|
||||
You are a helpful assistant.
|
||||
<</SYS>>
|
||||
|
||||
Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] I'd like to show off how chat templating works! [/INST]
|
||||
Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] <<SYS>><</SYS>>
|
||||
|
||||
I'd like to show off how chat templating works! [/INST]
|
||||
@@ -1,3 +1,5 @@
|
||||
[INST] <<SYS>><</SYS>>
|
||||
|
||||
Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] I'd like to show off how chat templating works! [/INST]
|
||||
Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] <<SYS>><</SYS>>
|
||||
|
||||
I'd like to show off how chat templating works! [/INST]
|
||||
@@ -1,2 +1,3 @@
|
||||
[INST] Hello, how are you?[/INST] I'm doing great. How can I help you today?</s>[INST] You are a helpful assistant.
|
||||
I'd like to show off how chat templating works![/INST]
|
||||
[INST] You are a helpful assistant.
|
||||
|
||||
Hello, how are you?[/INST] I'm doing great. How can I help you today?</s>[INST] I'd like to show off how chat templating works![/INST]
|
||||
@@ -1 +1 @@
|
||||
GPT Correct System: You are a helpful assistant.<|end_of_turn|>GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant:
|
||||
GPT4 Correct System: You are a helpful assistant.<|end_of_turn|>GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT4 Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT4 Correct Assistant:
|
||||
2
template/testdata/openchat.gotmpl/user
vendored
2
template/testdata/openchat.gotmpl/user
vendored
@@ -1 +1 @@
|
||||
GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant:
|
||||
GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant:
|
||||
@@ -1 +1 @@
|
||||
GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant:
|
||||
GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT4 Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT4 Correct Assistant:
|
||||
@@ -1,14 +1,4 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}{{ .System }}
|
||||
|
||||
{{ end }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "user" }}USER: {{ .Content }}
|
||||
{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }}</s>
|
||||
{{ end }}
|
||||
{{- end }}ASSISTANT:
|
||||
{{- else }}
|
||||
{{ if .System }}{{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}USER: {{ .Prompt }}
|
||||
{{ end }}ASSISTANT: {{ .Response }}
|
||||
{{- end }}
|
||||
{{ end }}ASSISTANT: {{ .Response }}</s>
|
||||
|
||||
@@ -1,15 +1,6 @@
|
||||
{{- if .Messages }}
|
||||
{{- if .System }}<|system|>
|
||||
{{ .System }}</s>
|
||||
{{ end }}
|
||||
{{- range .Messages }}<|{{ .Role }}|>
|
||||
{{ .Content }}</s>
|
||||
{{ end }}<|assistant|>
|
||||
{{ else }}
|
||||
{{ if .System }}<|system|>
|
||||
{{ .System }}</s>
|
||||
{{ end }}{{ if .Prompt }}<|user|>
|
||||
{{ .Prompt }}</s>
|
||||
{{ end }}<|assistant|>
|
||||
{{ .Response }}</s>
|
||||
{{- end }}
|
||||
Reference in New Issue
Block a user