mirror of
https://github.com/ollama/ollama.git
synced 2026-02-06 05:34:21 -05:00
Compare commits
20 Commits
jyan/quant
...
v0.3.9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1cef4d0a5 | ||
|
|
c41f0b9e6c | ||
|
|
142cbb722d | ||
|
|
9468c6824a | ||
|
|
11018196e0 | ||
|
|
56346ccfa3 | ||
|
|
8e4e509fa4 | ||
|
|
47c2b947a9 | ||
|
|
5eb77bf976 | ||
|
|
e4d0a9c325 | ||
|
|
7416ced70f | ||
|
|
9cfd2dd3e3 | ||
|
|
8e6da3cbc5 | ||
|
|
d9d50c43cc | ||
|
|
6c1c1ad6a9 | ||
|
|
413ae39f3c | ||
|
|
60e47573a6 | ||
|
|
eae3af6807 | ||
|
|
3eb08377f8 | ||
|
|
386af6c1a0 |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,4 +1,3 @@
|
||||
llm/ext_server/* linguist-vendored
|
||||
llm/*.h linguist-vendored
|
||||
* text=auto
|
||||
*.go text eol=lf
|
||||
|
||||
@@ -32,6 +32,10 @@ linters:
|
||||
linters-settings:
|
||||
gci:
|
||||
sections: [standard, default, localmodule]
|
||||
staticcheck:
|
||||
checks:
|
||||
- all
|
||||
- -SA1019 # omit Deprecated check
|
||||
severity:
|
||||
default-severity: error
|
||||
rules:
|
||||
|
||||
@@ -337,6 +337,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
||||
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
||||
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
|
||||
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
|
||||
- [Flox](https://flox.dev/blog/ollama-part-one)
|
||||
|
||||
### Libraries
|
||||
|
||||
|
||||
16
api/types.go
16
api/types.go
@@ -296,15 +296,17 @@ type EmbeddingResponse struct {
|
||||
// CreateRequest is the request passed to [Client.Create].
|
||||
type CreateRequest struct {
|
||||
Model string `json:"model"`
|
||||
Path string `json:"path"`
|
||||
Modelfile string `json:"modelfile"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
Quantize string `json:"quantize,omitempty"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
|
||||
// Quantization is deprecated, see Quantize
|
||||
// Deprecated: set the file content with Modelfile instead
|
||||
Path string `json:"path"`
|
||||
|
||||
// Deprecated: use Quantize instead
|
||||
Quantization string `json:"quantization,omitempty"`
|
||||
}
|
||||
|
||||
@@ -312,7 +314,7 @@ type CreateRequest struct {
|
||||
type DeleteRequest struct {
|
||||
Model string `json:"model"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
@@ -327,7 +329,7 @@ type ShowRequest struct {
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
@@ -359,7 +361,7 @@ type PullRequest struct {
|
||||
Password string `json:"password"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
@@ -380,7 +382,7 @@ type PushRequest struct {
|
||||
Password string `json:"password"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
|
||||
// Name is deprecated, see Model
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
|
||||
10
cmd/cmd.go
10
cmd/cmd.go
@@ -124,7 +124,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
var quantizeSpin *progress.Spinner
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
if resp.Digest != "" {
|
||||
spinner.Stop()
|
||||
@@ -137,15 +136,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
bar.Set(resp.Completed)
|
||||
} else if strings.Contains(resp.Status, "quantizing") {
|
||||
spinner.Stop()
|
||||
|
||||
if quantizeSpin != nil {
|
||||
quantizeSpin.SetMessage(resp.Status)
|
||||
} else {
|
||||
quantizeSpin = progress.NewSpinner(resp.Status)
|
||||
p.Add("quantize", quantizeSpin)
|
||||
}
|
||||
} else if status != resp.Status {
|
||||
spinner.Stop()
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ func TestMain(m *testing.M) {
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestConvertFull(t *testing.T) {
|
||||
func TestConvertModel(t *testing.T) {
|
||||
cases := []string{
|
||||
"Meta-Llama-3-8B-Instruct",
|
||||
"Meta-Llama-3.1-8B-Instruct",
|
||||
@@ -140,6 +140,107 @@ func TestConvertFull(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertInvalidDatatype(t *testing.T) {
|
||||
f, err := os.CreateTemp(t.TempDir(), "testmodel")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
tempDir := t.TempDir()
|
||||
generateSafetensorTestData(t, tempDir)
|
||||
|
||||
err = ConvertModel(os.DirFS(tempDir), f)
|
||||
if err == nil || err.Error() != "unsupported safetensors model" {
|
||||
t.Errorf("expected error but didn't get one")
|
||||
}
|
||||
}
|
||||
|
||||
func generateSafetensorTestData(t *testing.T, tempDir string) {
|
||||
type tensorData struct {
|
||||
Offsets []int `json:"data_offsets"`
|
||||
Type string `json:"dtype"`
|
||||
Shape []int `json:"shape"`
|
||||
}
|
||||
offset := 4096 * 14336
|
||||
|
||||
td := map[string]*tensorData{}
|
||||
td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
|
||||
Offsets: []int{0, offset},
|
||||
Type: "I8",
|
||||
Shape: []int{4096, 14336},
|
||||
}
|
||||
td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
|
||||
Offsets: []int{offset, offset},
|
||||
Type: "U8",
|
||||
Shape: []int{},
|
||||
}
|
||||
|
||||
data, err := json.Marshal(td)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
l := int64(len(data))
|
||||
err = binary.Write(&buf, binary.LittleEndian, l)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = buf.Write(data)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fdata.Close()
|
||||
|
||||
_, err = fdata.Write(buf.Bytes())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
configData := `
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
f, err := os.Create(filepath.Join(tempDir, "config.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(configData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tokenizerData := `
|
||||
{
|
||||
}
|
||||
`
|
||||
|
||||
f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(tokenizerData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertAdapter(t *testing.T) {
|
||||
type AdapterCase struct {
|
||||
Name string
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
@@ -50,6 +51,10 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
|
||||
|
||||
for _, key := range keys {
|
||||
if value := headers[key]; value.Type != "" {
|
||||
// bitsandbytes quantized models are unsupported
|
||||
if len(value.Shape) == 0 {
|
||||
return nil, errors.New("unsupported safetensors model")
|
||||
}
|
||||
ts = append(ts, safetensor{
|
||||
fs: fsys,
|
||||
path: p,
|
||||
|
||||
@@ -100,8 +100,21 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
|
||||
}
|
||||
|
||||
if template, ok := p["chat_template"]; ok {
|
||||
if err := json.Unmarshal(template, &t.Template); err != nil {
|
||||
return nil, err
|
||||
var s []struct {
|
||||
Name string `json:"name"`
|
||||
Template string `json:"template"`
|
||||
}
|
||||
if err := json.Unmarshal(template, &t.Template); err == nil {
|
||||
// noop
|
||||
} else if err := json.Unmarshal(template, &s); err == nil {
|
||||
for _, e := range s {
|
||||
if e.Name == "default" {
|
||||
t.Template = e.Template
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid chat_template: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,7 +154,6 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
|
||||
}
|
||||
|
||||
type tokenizer struct {
|
||||
Version string `json:"version"`
|
||||
AddedTokens []token `json:"added_tokens"`
|
||||
Model struct {
|
||||
Type string `json:"type"`
|
||||
@@ -239,7 +251,7 @@ func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
|
||||
return pattern.Func(fsys)
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown tensor format")
|
||||
return nil, errors.New("unknown tokenizer format")
|
||||
}
|
||||
|
||||
type SpecialVocabulary struct {
|
||||
|
||||
208
convert/tokenizer_test.go
Normal file
208
convert/tokenizer_test.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func createTokenizerFS(t *testing.T, dir string, files map[string]io.Reader) fs.FS {
|
||||
t.Helper()
|
||||
|
||||
for k, v := range files {
|
||||
if err := func() error {
|
||||
f, err := os.Create(filepath.Join(dir, k))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if _, err := io.Copy(f, v); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return os.DirFS(dir)
|
||||
}
|
||||
|
||||
func TestParseTokenizer(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
fsys fs.FS
|
||||
specialTokenTypes []string
|
||||
want *Tokenizer
|
||||
}{
|
||||
{
|
||||
name: "string chat template",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"chat_template": "<default template>"
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
||||
Pre: "default",
|
||||
Template: "<default template>",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "list chat template",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"chat_template": [
|
||||
{
|
||||
"name": "default",
|
||||
"template": "<default template>"
|
||||
},
|
||||
{
|
||||
"name": "tools",
|
||||
"template": "<tools template>"
|
||||
}
|
||||
]
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
||||
Pre: "default",
|
||||
Template: "<default template>",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "added tokens",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 999,
|
||||
"content": "<unused999>",
|
||||
"special": false
|
||||
}
|
||||
]
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<unused999>"},
|
||||
Scores: []float32{999},
|
||||
Types: []int32{4},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "added tokens overlap vocab",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 0,
|
||||
"content": "<pad>",
|
||||
"special": true
|
||||
}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<pad>": 0
|
||||
}
|
||||
}
|
||||
}`),
|
||||
}),
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<pad>"},
|
||||
Scores: []float32{0},
|
||||
Types: []int32{3},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "special token types",
|
||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
||||
"tokenizer.json": strings.NewReader(`{
|
||||
"added_tokens": [
|
||||
{
|
||||
"id": 0,
|
||||
"content": "<pad>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "<eos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "<bos>",
|
||||
"special": true
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "<unk>",
|
||||
"special": true
|
||||
}
|
||||
],
|
||||
"model": {
|
||||
"vocab": {
|
||||
"<pad>": 0,
|
||||
"<eos>": 1,
|
||||
"<bos>": 2,
|
||||
"<unk>": 3
|
||||
}
|
||||
}
|
||||
}`),
|
||||
"tokenizer_config.json": strings.NewReader(`{
|
||||
"add_bos_token": true,
|
||||
"add_eos_token": false,
|
||||
"bos_token": "<bos>",
|
||||
"eos_token": "<eos>",
|
||||
"pad_token": "<pad>",
|
||||
"unk_token": "<unk>"
|
||||
}`),
|
||||
}),
|
||||
specialTokenTypes: []string{"pad", "eos", "bos", "unk"},
|
||||
want: &Tokenizer{
|
||||
Vocabulary: &Vocabulary{
|
||||
Model: "gpt2",
|
||||
Tokens: []string{"<pad>", "<eos>", "<bos>", "<unk>"},
|
||||
Scores: []float32{0, 1, 2, 3},
|
||||
Types: []int32{3, 3, 3, 3},
|
||||
},
|
||||
SpecialVocabulary: []*SpecialVocabulary{
|
||||
{Type: "pad", Content: "<pad>", ID: 0, AddToken: false},
|
||||
{Type: "eos", Content: "<eos>", ID: 1, AddToken: false},
|
||||
{Type: "bos", Content: "<bos>", ID: 2, AddToken: true},
|
||||
{Type: "unk", Content: "<unk>", ID: 3, AddToken: false},
|
||||
},
|
||||
Pre: "default",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tokenizer, err := parseTokenizer(tt.fsys, tt.specialTokenTypes)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(tt.want, tokenizer); diff != "" {
|
||||
t.Errorf("unexpected tokenizer (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -300,3 +300,28 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
### Setting the context size
|
||||
|
||||
The OpenAI API does not have a way of setting the context size for a model. If you need to change the context size, create a `Modelfile` which looks like:
|
||||
|
||||
```modelfile
|
||||
FROM <some model>
|
||||
PARAMETER num_ctx <context size>
|
||||
```
|
||||
|
||||
Use the `ollama create mymodel` command to create a new model with the updated context size. Call the API with the updated model name:
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "mymodel",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello!"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -30,9 +30,7 @@ func Host() *url.URL {
|
||||
defaultPort = "443"
|
||||
}
|
||||
|
||||
// trim trailing slashes
|
||||
hostport = strings.TrimRight(hostport, "/")
|
||||
|
||||
hostport, path, _ := strings.Cut(hostport, "/")
|
||||
host, port, err := net.SplitHostPort(hostport)
|
||||
if err != nil {
|
||||
host, port = "127.0.0.1", defaultPort
|
||||
@@ -45,15 +43,13 @@ func Host() *url.URL {
|
||||
|
||||
if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
|
||||
slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
|
||||
return &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: net.JoinHostPort(host, defaultPort),
|
||||
}
|
||||
port = defaultPort
|
||||
}
|
||||
|
||||
return &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: net.JoinHostPort(host, port),
|
||||
Path: path,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,34 +13,35 @@ func TestHost(t *testing.T) {
|
||||
value string
|
||||
expect string
|
||||
}{
|
||||
"empty": {"", "127.0.0.1:11434"},
|
||||
"only address": {"1.2.3.4", "1.2.3.4:11434"},
|
||||
"only port": {":1234", ":1234"},
|
||||
"address and port": {"1.2.3.4:1234", "1.2.3.4:1234"},
|
||||
"hostname": {"example.com", "example.com:11434"},
|
||||
"hostname and port": {"example.com:1234", "example.com:1234"},
|
||||
"zero port": {":0", ":0"},
|
||||
"too large port": {":66000", ":11434"},
|
||||
"too small port": {":-1", ":11434"},
|
||||
"ipv6 localhost": {"[::1]", "[::1]:11434"},
|
||||
"ipv6 world open": {"[::]", "[::]:11434"},
|
||||
"ipv6 no brackets": {"::1", "[::1]:11434"},
|
||||
"ipv6 + port": {"[::1]:1337", "[::1]:1337"},
|
||||
"extra space": {" 1.2.3.4 ", "1.2.3.4:11434"},
|
||||
"extra quotes": {"\"1.2.3.4\"", "1.2.3.4:11434"},
|
||||
"extra space+quotes": {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
|
||||
"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
|
||||
"http": {"http://1.2.3.4", "1.2.3.4:80"},
|
||||
"http port": {"http://1.2.3.4:4321", "1.2.3.4:4321"},
|
||||
"https": {"https://1.2.3.4", "1.2.3.4:443"},
|
||||
"https port": {"https://1.2.3.4:4321", "1.2.3.4:4321"},
|
||||
"empty": {"", "http://127.0.0.1:11434"},
|
||||
"only address": {"1.2.3.4", "http://1.2.3.4:11434"},
|
||||
"only port": {":1234", "http://:1234"},
|
||||
"address and port": {"1.2.3.4:1234", "http://1.2.3.4:1234"},
|
||||
"hostname": {"example.com", "http://example.com:11434"},
|
||||
"hostname and port": {"example.com:1234", "http://example.com:1234"},
|
||||
"zero port": {":0", "http://:0"},
|
||||
"too large port": {":66000", "http://:11434"},
|
||||
"too small port": {":-1", "http://:11434"},
|
||||
"ipv6 localhost": {"[::1]", "http://[::1]:11434"},
|
||||
"ipv6 world open": {"[::]", "http://[::]:11434"},
|
||||
"ipv6 no brackets": {"::1", "http://[::1]:11434"},
|
||||
"ipv6 + port": {"[::1]:1337", "http://[::1]:1337"},
|
||||
"extra space": {" 1.2.3.4 ", "http://1.2.3.4:11434"},
|
||||
"extra quotes": {"\"1.2.3.4\"", "http://1.2.3.4:11434"},
|
||||
"extra space+quotes": {" \" 1.2.3.4 \" ", "http://1.2.3.4:11434"},
|
||||
"extra single quotes": {"'1.2.3.4'", "http://1.2.3.4:11434"},
|
||||
"http": {"http://1.2.3.4", "http://1.2.3.4:80"},
|
||||
"http port": {"http://1.2.3.4:4321", "http://1.2.3.4:4321"},
|
||||
"https": {"https://1.2.3.4", "https://1.2.3.4:443"},
|
||||
"https port": {"https://1.2.3.4:4321", "https://1.2.3.4:4321"},
|
||||
"proxy path": {"https://example.com/ollama", "https://example.com:443/ollama"},
|
||||
}
|
||||
|
||||
for name, tt := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
t.Setenv("OLLAMA_HOST", tt.value)
|
||||
if host := Host(); host.Host != tt.expect {
|
||||
t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
|
||||
if host := Host(); host.String() != tt.expect {
|
||||
t.Errorf("%s: expected %s, got %s", name, tt.expect, host.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -87,6 +87,8 @@ apply_patches() {
|
||||
build() {
|
||||
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
|
||||
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
|
||||
# remove unnecessary build artifacts
|
||||
rm -f ${BUILD_DIR}/bin/ggml-common.h ${BUILD_DIR}/bin/ggml-metal.metal
|
||||
}
|
||||
|
||||
compress() {
|
||||
|
||||
1227
llm/llama.h
1227
llm/llama.h
File diff suppressed because it is too large
Load Diff
50
llm/llm.go
50
llm/llm.go
@@ -1,6 +1,6 @@
|
||||
package llm
|
||||
|
||||
// #cgo CPPFLAGS: -Illama.cpp/ggml/include
|
||||
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
|
||||
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
|
||||
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
|
||||
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
|
||||
@@ -9,24 +9,12 @@ package llm
|
||||
// #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
|
||||
// #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
|
||||
// #include <stdlib.h>
|
||||
// #include <stdatomic.h>
|
||||
// #include "llama.h"
|
||||
// bool update_quantize_progress(float progress, void* data) {
|
||||
// atomic_int* atomicData = (atomic_int*)data;
|
||||
// int intProgress = *((int*)&progress);
|
||||
// atomic_store(atomicData, intProgress);
|
||||
// return true;
|
||||
// }
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// SystemInfo is an unused example of calling llama.cpp functions using CGo
|
||||
@@ -34,49 +22,17 @@ func SystemInfo() string {
|
||||
return C.GoString(C.llama_print_system_info())
|
||||
}
|
||||
|
||||
func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressResponse), tensorCount int) error {
|
||||
func Quantize(infile, outfile string, ftype fileType) error {
|
||||
cinfile := C.CString(infile)
|
||||
defer C.free(unsafe.Pointer(cinfile))
|
||||
|
||||
coutfile := C.CString(outfile)
|
||||
defer C.free(unsafe.Pointer(coutfile))
|
||||
|
||||
params := C.llama_model_quantize_default_params()
|
||||
params.nthread = -1
|
||||
params.ftype = ftype.Value()
|
||||
|
||||
// Initialize "global" to store progress
|
||||
store := (*int32)(C.malloc(C.sizeof_int))
|
||||
defer C.free(unsafe.Pointer(store))
|
||||
|
||||
// Initialize store value, e.g., setting initial progress to 0
|
||||
atomic.StoreInt32(store, 0)
|
||||
|
||||
params.quantize_callback_data = unsafe.Pointer(store)
|
||||
params.quantize_callback = (C.llama_progress_callback)(C.update_quantize_progress)
|
||||
|
||||
ticker := time.NewTicker(30 * time.Millisecond)
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
go func() {
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
progressInt := atomic.LoadInt32(store)
|
||||
progress := *(*float32)(unsafe.Pointer(&progressInt))
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("quantizing model %d%%", 100*int(progress)/tensorCount),
|
||||
})
|
||||
case <-done:
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("quantizing model 100%%"),
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||
return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
||||
}
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
From ed941590d59fc07b1ad21d6aa458588e47d1e446 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Yan <jyan00017@gmail.com>
|
||||
Date: Wed, 10 Jul 2024 13:39:39 -0700
|
||||
Subject: [PATCH] quantize progress
|
||||
|
||||
---
|
||||
include/llama.h | 3 +++
|
||||
src/llama.cpp | 8 ++++++++
|
||||
2 files changed, 11 insertions(+)
|
||||
|
||||
diff --git a/include/llama.h b/include/llama.h
|
||||
index bb4b05ba..613db68e 100644
|
||||
--- a/include/llama.h
|
||||
+++ b/include/llama.h
|
||||
@@ -349,6 +349,9 @@ extern "C" {
|
||||
bool keep_split; // quantize to the same number of shards
|
||||
void * imatrix; // pointer to importance matrix data
|
||||
void * kv_overrides; // pointer to vector containing overrides
|
||||
+
|
||||
+ llama_progress_callback quantize_callback; // callback to report quantization progress
|
||||
+ void * quantize_callback_data; // user data for the callback
|
||||
} llama_model_quantize_params;
|
||||
|
||||
// grammar types
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index 2b9ace28..ac640c02 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -18252,6 +18252,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
const auto tn = LLM_TN(model.arch);
|
||||
new_ofstream(0);
|
||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||
+ if (params->quantize_callback){
|
||||
+ if (!params->quantize_callback(i, params->quantize_callback_data)) {
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
auto weight = ml.get_weight(i);
|
||||
struct ggml_tensor * tensor = weight->tensor;
|
||||
if (weight->idx != cur_split && params->keep_split) {
|
||||
@@ -18789,6 +18795,8 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
||||
/*.keep_split =*/ false,
|
||||
/*.imatrix =*/ nullptr,
|
||||
/*.kv_overrides =*/ nullptr,
|
||||
+ /*.quantize_callback =*/ nullptr,
|
||||
+ /*.quantize_callback_data =*/ nullptr,
|
||||
};
|
||||
|
||||
return result;
|
||||
--
|
||||
2.39.3 (Apple Git-146)
|
||||
@@ -30,7 +30,7 @@ if grep -i "centos" /etc/system-release >/dev/null; then
|
||||
dnf install -y rh-git227-git
|
||||
ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git
|
||||
fi
|
||||
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz
|
||||
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz findutils
|
||||
elif grep -i "rocky" /etc/system-release >/dev/null; then
|
||||
# Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC)
|
||||
cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo
|
||||
@@ -45,6 +45,7 @@ EOF
|
||||
dnf install -y git \
|
||||
gcc-toolset-10-gcc-10.2.1-8.2.el8 \
|
||||
gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
|
||||
findutils \
|
||||
pigz
|
||||
else
|
||||
echo "ERROR Unexpected distro"
|
||||
|
||||
@@ -435,14 +435,11 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
return err
|
||||
}
|
||||
|
||||
tensorCount := len(baseLayer.GGML.Tensors().Items)
|
||||
ft := baseLayer.GGML.KV().FileType()
|
||||
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
||||
return errors.New("quantization is only supported for F16 and F32 models")
|
||||
} else if want != ft {
|
||||
fn(api.ProgressResponse{
|
||||
Status: "quantizing model tensors",
|
||||
})
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
|
||||
|
||||
blob, err := GetBlobsPath(baseLayer.Digest)
|
||||
if err != nil {
|
||||
@@ -456,7 +453,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
defer temp.Close()
|
||||
defer os.Remove(temp.Name())
|
||||
|
||||
if err := llm.Quantize(blob, temp.Name(), want, fn, tensorCount); err != nil {
|
||||
if err := llm.Quantize(blob, temp.Name(), want); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -139,6 +139,7 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,
|
||||
|
||||
func TestParseFromFileFromLayer(t *testing.T) {
|
||||
tempModels := t.TempDir()
|
||||
t.Setenv("OLLAMA_MODELS", tempModels)
|
||||
|
||||
file, err := os.CreateTemp(tempModels, "")
|
||||
if err != nil {
|
||||
@@ -189,6 +190,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
|
||||
|
||||
func TestParseLayerFromCopy(t *testing.T) {
|
||||
tempModels := t.TempDir()
|
||||
t.Setenv("OLLAMA_MODELS", tempModels)
|
||||
|
||||
file2, err := os.CreateTemp(tempModels, "")
|
||||
if err != nil {
|
||||
|
||||
@@ -73,18 +73,6 @@ func ParseModelPath(name string) ModelPath {
|
||||
|
||||
var errModelPathInvalid = errors.New("invalid model path")
|
||||
|
||||
func (mp ModelPath) Validate() error {
|
||||
if mp.Repository == "" {
|
||||
return fmt.Errorf("%w: model repository name is required", errModelPathInvalid)
|
||||
}
|
||||
|
||||
if strings.Contains(mp.Tag, ":") {
|
||||
return fmt.Errorf("%w: ':' (colon) is not allowed in tag names", errModelPathInvalid)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mp ModelPath) GetNamespaceRepository() string {
|
||||
return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
|
||||
}
|
||||
@@ -105,7 +93,11 @@ func (mp ModelPath) GetShortTagname() string {
|
||||
|
||||
// GetManifestPath returns the path to the manifest file for the given model path, it is up to the caller to create the directory if it does not exist.
|
||||
func (mp ModelPath) GetManifestPath() (string, error) {
|
||||
return filepath.Join(envconfig.Models(), "manifests", mp.Registry, mp.Namespace, mp.Repository, mp.Tag), nil
|
||||
if p := filepath.Join(mp.Registry, mp.Namespace, mp.Repository, mp.Tag); filepath.IsLocal(p) {
|
||||
return filepath.Join(envconfig.Models(), "manifests", p), nil
|
||||
}
|
||||
|
||||
return "", errModelPathInvalid
|
||||
}
|
||||
|
||||
func (mp ModelPath) BaseURL() *url.URL {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
@@ -154,3 +155,10 @@ func TestParseModelPath(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInsecureModelpath(t *testing.T) {
|
||||
mp := ParseModelPath("../../..:something")
|
||||
if _, err := mp.GetManifestPath(); !errors.Is(err, errModelPathInvalid) {
|
||||
t.Errorf("expected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -593,9 +593,9 @@ func TestCreateDetectTemplate(t *testing.T) {
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
|
||||
filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
|
||||
filepath.Join(p, "blobs", "sha256-35360843d0c84fb1506952a131bbef13cd2bb4a541251f22535170c05b56e672"),
|
||||
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
|
||||
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
|
||||
filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
|
||||
filepath.Join(p, "blobs", "sha256-de3959f841e9ef6b4b6255fa41cb9e0a45da89c3066aa72bdd07a4747f848990"),
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
|
||||
{{- range .Messages }}<start_{{ .Role }}>{{ .Content }}<end_message>
|
||||
{{- end }}<start_assistant>
|
||||
@@ -1,8 +1,18 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}### Instruction:
|
||||
{{ .Prompt }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}### Instruction:
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}### Response:
|
||||
{{ .Response }}
|
||||
{{ else if eq .Role "assistant" }}### Response:
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}
|
||||
{{- end }}### Response:
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
{{ if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}{{ if .Prompt }}<|im_start|>user
|
||||
{{ .Prompt }}<|im_end|>
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ .Response }}<|im_end|>
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{{ if .System }}System: {{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}User: {{ .Prompt }}
|
||||
|
||||
{{ end }}Assistant: {{ .Response }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}System:
|
||||
{{- else if eq .Role "user" }}User:
|
||||
{{- else if eq .Role "assistant" }}Assistant:
|
||||
{{- end }} {{ .Content }}
|
||||
|
||||
{{ end }}Assistant:
|
||||
@@ -1,10 +1,10 @@
|
||||
{{ if .System }}Source: system
|
||||
|
||||
{{ .System }} <step> {{ end }}Source: user
|
||||
|
||||
{{ .Prompt }} <step> Source: assistant
|
||||
{{- if not .Response }}
|
||||
Destination: user
|
||||
{{- range .Messages }}Source:
|
||||
{{- if eq .Role "system" }} system
|
||||
{{- else if eq .Role "user" }} user
|
||||
{{- else if eq .Role "assistant" }} assistant
|
||||
{{- end }}
|
||||
|
||||
{{ .Response }} <step>
|
||||
{{ .Content }} <step> {{ end }}Source: assistant
|
||||
Destination: user
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
{{ if .System }}System: {{ .System }}
|
||||
{{ end }}{{ if .Prompt }}User:
|
||||
{{ .Prompt }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}System: {{ .Content }}
|
||||
{{ continue }}
|
||||
{{- else if eq .Role "user" }}User:
|
||||
{{- else if eq .Role "assistant" }}Falcon:
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
{{ end }}Falcon:
|
||||
{{ .Response }}
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
<start_of_turn>user
|
||||
{{ if .System }}{{ .System }}
|
||||
{{ end }}{{ .Prompt }}<end_of_turn>
|
||||
<start_of_turn>model
|
||||
{{ .Response }}<end_of_turn>
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- continue }}
|
||||
{{- else if eq .Role "user" }}<start_of_turn>user
|
||||
{{- if $system }}
|
||||
{{ $system }}
|
||||
{{- $system = "" }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "assistant" }}<start_of_turn>model
|
||||
{{- end }}
|
||||
{{ .Content }}<end_of_turn>
|
||||
{{ end }}<start_of_turn>model
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
{{ if .System }}System:
|
||||
{{ .System }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}Question:
|
||||
{{ .Prompt }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}System:
|
||||
{{- else if eq .Role "user" }}Question:
|
||||
{{- else if eq .Role "assistant" }}Answer:
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}Answer:
|
||||
{{ .Response }}
|
||||
|
||||
|
||||
@@ -91,6 +91,10 @@
|
||||
"template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
||||
"name": "llama3-instruct"
|
||||
},
|
||||
{
|
||||
"template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
||||
"name": "llama3-instruct"
|
||||
},
|
||||
{
|
||||
"template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ 'Question:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'system' %}\n{{ 'System:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Answer:\n' + message['content'] + '\n\n' }}{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ 'Answer:\n' }}{% endif %}{% endfor %}",
|
||||
"name": "granite-instruct"
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
[INST] <<SYS>>
|
||||
{{- if .System }}
|
||||
{{ .System }}
|
||||
{{ end }}<</SYS>>
|
||||
{{- $system := "" }}[INST] {{ range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}<<SYS>>
|
||||
{{- if $system }}
|
||||
{{ $system }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}<</SYS>>
|
||||
|
||||
{{ .Prompt }} [/INST] {{ .Response }}</s><s>
|
||||
{{ .Content }} [/INST]
|
||||
{{- else if eq .Role "assistant" }} {{ .Content }}</s><s>[INST] {{ end }}
|
||||
{{- end }}
|
||||
@@ -1,7 +1,5 @@
|
||||
{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
||||
|
||||
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
||||
{{ .Content }}<|eot_id|>
|
||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
{{ .Response }}<|eot_id|>
|
||||
@@ -1,8 +1,17 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- continue }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}@@ Instruction
|
||||
{{ .Prompt }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}@@ Instruction
|
||||
{{- else if eq .Role "assistant" }}@@ Response
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}@@ Response
|
||||
{{ .Response }}
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
[INST] {{ if .System }}{{ .System }}
|
||||
[INST] {{ range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "system" }}{{ .Content }}
|
||||
|
||||
{{ end }}{{ .Prompt }}[/INST] {{ .Response }}</s>
|
||||
{{ else if eq .Role "user" }}{{ .Content }}[/INST]
|
||||
{{- else if eq .Role "assistant" }} {{ .Content }}</s>[INST] {{ end }}
|
||||
{{- end }}
|
||||
@@ -1 +1,6 @@
|
||||
{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>{{ end }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
|
||||
{{- range .Messages }}GPT4 Correct
|
||||
{{- if eq .Role "system" }} System:
|
||||
{{- else if eq .Role "user" }} User:
|
||||
{{- else if eq .Role "assistant" }} Assistant:
|
||||
{{- end }} {{ .Content }}<|end_of_turn|>
|
||||
{{- end }}GPT4 Correct Assistant:
|
||||
@@ -1,6 +1,3 @@
|
||||
{{ if .System }}<|system|>
|
||||
{{ .System }}<|end|>
|
||||
{{ end }}{{ if .Prompt }}<|user|>
|
||||
{{ .Prompt }}<|end|>
|
||||
{{- range .Messages }}<|{{ .Role }}|>
|
||||
{{ .Content }}<|end|>
|
||||
{{ end }}<|assistant|>
|
||||
{{ .Response }}<|end|>
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
{{ if .System }}### System:
|
||||
{{ .System }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}### System:
|
||||
{{- else if eq .Role "user" }}### User:
|
||||
{{- else if eq .Role "assistant" }}### Assistant:
|
||||
{{ .Content }}</s>
|
||||
|
||||
{{ end }}{{ if .Prompt }}### User:
|
||||
{{ .Prompt }}
|
||||
{{ continue }}
|
||||
{{- end }}
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}### Assistant:
|
||||
{{ .Response }}</s>
|
||||
|
||||
|
||||
@@ -1,8 +1,18 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}### Instruction
|
||||
{{ .Prompt }}
|
||||
{{ $system = "" }}
|
||||
{{- end }}### Instruction
|
||||
{{ .Content }}
|
||||
|
||||
{{ end }}### Response
|
||||
{{ .Response }}<|endoftext|>
|
||||
{{ else if eq .Role "assistant" }}### Response
|
||||
{{ .Content }}<|endoftext|>
|
||||
|
||||
{{ end }}
|
||||
{{- end }}### Response
|
||||
|
||||
@@ -1,4 +1,14 @@
|
||||
{{ if .System }}{{ .System }}
|
||||
{{- $system := "" }}
|
||||
{{- range .Messages }}
|
||||
{{- if eq .Role "system" }}
|
||||
{{- if not $system }}{{ $system = .Content }}
|
||||
{{- else }}{{ $system = printf "%s\n\n%s" $system .Content }}
|
||||
{{- end }}
|
||||
{{- else if eq .Role "user" }}
|
||||
{{- if $system }}{{ $system }}
|
||||
|
||||
{{ end }}{{ if .Prompt }}USER: {{ .Prompt }}
|
||||
{{ end }}ASSISTANT: {{ .Response }}</s>
|
||||
{{ $system = "" }}
|
||||
{{- end }}USER: {{ .Content }}
|
||||
{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }}</s>
|
||||
{{ end }}
|
||||
{{- end }}ASSISTANT:
|
||||
@@ -1,6 +1,3 @@
|
||||
{{ if .System }}<|system|>
|
||||
{{ .System }}</s>
|
||||
{{ end }}{{ if .Prompt }}<|user|>
|
||||
{{ .Prompt }}</s>
|
||||
{{- range .Messages }}<|{{ .Role }}|>
|
||||
{{ .Content }}</s>
|
||||
{{ end }}<|assistant|>
|
||||
{{ .Response }}</s>
|
||||
|
||||
Reference in New Issue
Block a user