Compare commits

..

23 Commits

Author SHA1 Message Date
Roy Han
2647a0e443 num parallel embed 2024-07-26 15:18:35 -07:00
Michael Yang
ec4c35fe99 Merge pull request #5512 from ollama/mxyng/detect-stop
autodetect stop parameters from template
2024-07-26 13:48:23 -07:00
Jeffrey Morgan
f5e3939220 Update api.md (#5968) 2024-07-25 23:10:18 -04:00
Jeffrey Morgan
ae27d9dcfd Update openai.md 2024-07-25 20:27:33 -04:00
Michael Yang
37096790a7 Merge pull request #5552 from ollama/mxyng/messages-docs
docs
2024-07-25 16:26:19 -07:00
Michael Yang
997c903884 Update docs/template.md
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2024-07-25 16:23:40 -07:00
Blake Mizerany
c8af3c2d96 server: reuse original download URL for images (#5962)
This changes the registry client to reuse the original download URL
it gets on the first redirect response for all subsequent requests,
preventing thundering herd issues when hot new LLMs are released.
2024-07-25 15:58:30 -07:00
Jeffrey Morgan
455e61170d Update openai.md 2024-07-25 18:34:47 -04:00
royjhan
4de1370a9d openai tools doc (#5617) 2024-07-25 18:34:06 -04:00
Jeffrey Morgan
bbf8f102ee Revert "llm(llama): pass rope factors (#5924)" (#5963)
This reverts commit bb46bbcf5e.
2024-07-25 18:24:55 -04:00
Michael Yang
bb46bbcf5e llm(llama): pass rope factors (#5924) 2024-07-24 16:05:59 -04:00
royjhan
ac33aa7d37 Fix Embed Test Flakes (#5893)
* float cmp

* increase tolerance
2024-07-24 11:15:46 -07:00
Ajay Chintala
a6cd8f6169 Update README.md to add LLMStack integration (#5799) 2024-07-23 14:40:23 -04:00
Daniel Hiltgen
c78089263a Merge pull request #5864 from dhiltgen/bump_go
Bump Go patch version
2024-07-22 16:34:18 -07:00
Daniel Hiltgen
3e5ea035d5 Merge pull request #5757 from lreed-mdsol/lreed/bump-go-version-fix-vulnerabilities
bump go version to 1.22.5 to fix security vulnerabilities in docker
2024-07-22 16:32:43 -07:00
Daniel Hiltgen
5d604eec5b Bump Go patch version 2024-07-22 16:16:28 -07:00
Josh
db0968f30c fix dupe err message (#5857) 2024-07-22 15:48:15 -07:00
Michael Yang
9b60a038e5 update api.md 2024-07-22 13:49:51 -07:00
Michael Yang
83a0cb8d88 docs 2024-07-22 13:38:09 -07:00
royjhan
c0648233f2 api embed docs (#5282) 2024-07-22 13:37:08 -07:00
Jeffrey Morgan
d835368eb8 convert: capture head_dim for mistral (#5818) 2024-07-22 16:16:22 -04:00
lreed
f02f83660c bump go version to 1.22.5 to fix security vulnerabilities 2024-07-17 21:44:19 +00:00
Michael Yang
ebc529cbb3 autodetect stop parameters from template 2024-07-12 16:01:23 -07:00
42 changed files with 767 additions and 483 deletions

View File

@@ -31,7 +31,7 @@ jobs:
security set-keychain-settings -lut 3600 build.keychain
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: Build Darwin
env:
@@ -87,7 +87,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: go get ./...
- run: |
@@ -141,7 +141,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install ROCm'
run: |
@@ -218,7 +218,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install CUDA'
run: |
@@ -306,7 +306,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: go get
- uses: actions/download-artifact@v4

View File

@@ -63,7 +63,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: go get ./...
- run: |
@@ -163,7 +163,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install ROCm'
run: |
@@ -200,7 +200,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install CUDA'
run: |
@@ -255,7 +255,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: false
- run: |
case ${{ matrix.arch }} in
@@ -297,7 +297,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: |
case ${{ matrix.arch }} in

View File

@@ -1,4 +1,4 @@
ARG GOLANG_VERSION=1.22.1
ARG GOLANG_VERSION=1.22.5
ARG CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG CUDA_VERSION=11.3.1

View File

@@ -64,7 +64,8 @@ Here are some example models that can be downloaded:
| LLaVA | 7B | 4.5GB | `ollama run llava` |
| Solar | 10.7B | 6.1GB | `ollama run solar` |
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
> [!NOTE]
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
## Customize a model
@@ -296,6 +297,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
### Terminal

View File

@@ -17,7 +17,6 @@ import (
"bufio"
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
@@ -25,10 +24,7 @@ import (
"net/http"
"net/url"
"runtime"
"strings"
"time"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/version"
@@ -387,16 +383,3 @@ func (c *Client) Version(ctx context.Context) (string, error) {
return version.Version, nil
}
func Authorization(ctx context.Context, request *http.Request) (string, error) {
data := []byte(fmt.Sprintf("%s,%s,%d", request.Method, request.URL.RequestURI(), time.Now().Unix()))
token, err := auth.Sign(ctx, data)
if err != nil {
return "", err
}
// interleave request data into the token
key, sig, _ := strings.Cut(token, ":")
return fmt.Sprintf("%s:%s:%s", key, base64.StdEncoding.EncodeToString(data), sig), nil
}

View File

@@ -3,67 +3,49 @@ package auth
import (
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/pem"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strings"
"golang.org/x/crypto/ssh"
)
const defaultPrivateKey = "id_ed25519"
func privateKey() (ssh.Signer, error) {
func keyPath() (string, error) {
home, err := os.UserHomeDir()
if err != nil {
return nil, err
return "", err
}
keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
privateKeyFile, err := os.ReadFile(keyPath)
if os.IsNotExist(err) {
err := initializeKeypair()
if err != nil {
return nil, err
}
return privateKey()
} else if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return nil, err
}
return ssh.ParsePrivateKey(privateKeyFile)
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
}
func GetPublicKey() (ssh.PublicKey, error) {
// try to read pubkey first
home, err := os.UserHomeDir()
func GetPublicKey() (string, error) {
keyPath, err := keyPath()
if err != nil {
return nil, err
return "", err
}
pubkeyPath := filepath.Join(home, ".ollama", defaultPrivateKey+".pub")
pubKeyFile, err := os.ReadFile(pubkeyPath)
if os.IsNotExist(err) {
// try from privateKey
privateKey, err := privateKey()
if err != nil {
return nil, fmt.Errorf("failed to read public key: %w", err)
}
return privateKey.PublicKey(), nil
} else if err != nil {
return nil, fmt.Errorf("failed to read public key: %w", err)
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
}
pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyFile)
return pubKey, err
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
if err != nil {
return "", err
}
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
return strings.TrimSpace(string(publicKey)), nil
}
func NewNonce(r io.Reader, length int) (string, error) {
@@ -76,20 +58,25 @@ func NewNonce(r io.Reader, length int) (string, error) {
}
func Sign(ctx context.Context, bts []byte) (string, error) {
privateKey, err := privateKey()
keyPath, err := keyPath()
if err != nil {
return "", err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
if err != nil {
return "", err
}
// get the pubkey, but remove the type
publicKey, err := GetPublicKey()
if err != nil {
return "", err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
parts := bytes.Split(publicKeyBytes, []byte(" "))
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
parts := bytes.Split(publicKey, []byte(" "))
if len(parts) < 2 {
return "", fmt.Errorf("malformed public key")
}
@@ -102,49 +89,3 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
// signature is <pubkey>:<signature>
return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
}
func initializeKeypair() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
_, err = os.Stat(privKeyPath)
if os.IsNotExist(err) {
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return err
}
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
return fmt.Errorf("could not create directory %w", err)
}
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
return err
}
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
if err != nil {
return err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
return err
}
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
}
return nil
}

View File

@@ -4,8 +4,10 @@ import (
"archive/zip"
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/sha256"
"encoding/json"
"encoding/pem"
"errors"
"fmt"
"io"
@@ -13,7 +15,6 @@ import (
"math"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"path/filepath"
@@ -262,8 +263,6 @@ func tempZipFiles(path string) (string, error) {
return tempfile.Name(), nil
}
var ErrBlobExists = errors.New("blob exists")
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
bin, err := os.Open(path)
if err != nil {
@@ -281,120 +280,12 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
}
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
// We check if we can find the models directory locally
// If we can, we return the path to the directory
// If we can't, we return an error
// If the blob exists already, we return the digest
dest, err := getLocalPath(cmd.Context(), digest)
if errors.Is(err, ErrBlobExists) {
return digest, nil
}
// Successfully found the model directory
if err == nil {
// Copy blob in via OS specific copy
// Linux errors out to use io.copy
err = localCopy(path, dest)
if err == nil {
return digest, nil
}
// Default copy using io.copy
err = defaultCopy(path, dest)
if err == nil {
return digest, nil
}
}
// If at any point copying the blob over locally fails, we default to the copy through the server
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
return "", err
}
return digest, nil
}
func getLocalPath(ctx context.Context, digest string) (string, error) {
ollamaHost := envconfig.Host
client := http.DefaultClient
base := &url.URL{
Scheme: ollamaHost.Scheme,
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
}
data, err := json.Marshal(digest)
if err != nil {
return "", err
}
reqBody := bytes.NewReader(data)
path := fmt.Sprintf("/api/blobs/%s", digest)
requestURL := base.JoinPath(path)
request, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), reqBody)
if err != nil {
return "", err
}
authz, err := api.Authorization(ctx, request)
if err != nil {
return "", err
}
request.Header.Set("Authorization", authz)
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
request.Header.Set("X-Redirect-Create", "1")
resp, err := client.Do(request)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusTemporaryRedirect {
dest := resp.Header.Get("LocalLocation")
return dest, nil
}
return "", ErrBlobExists
}
func defaultCopy(path string, dest string) error {
// This function should be called if the server is local
// It should find the model directory, copy the blob over, and return the digest
dirPath := filepath.Dir(dest)
if err := os.MkdirAll(dirPath, 0o755); err != nil {
return err
}
// Copy blob over
sourceFile, err := os.Open(path)
if err != nil {
return fmt.Errorf("could not open source file: %v", err)
}
defer sourceFile.Close()
destFile, err := os.Create(dest)
if err != nil {
return fmt.Errorf("could not create destination file: %v", err)
}
defer destFile.Close()
_, err = io.CopyBuffer(destFile, sourceFile, make([]byte, 4*1024*1024))
if err != nil {
return fmt.Errorf("error copying file: %v", err)
}
err = destFile.Sync()
if err != nil {
return fmt.Errorf("error flushing file: %v", err)
}
return nil
}
func RunHandler(cmd *cobra.Command, args []string) error {
interactive := true
@@ -488,12 +379,11 @@ func errFromUnknownKey(unknownKeyErr error) error {
if len(matches) > 0 {
serverPubKey := matches[0]
publicKey, err := auth.GetPublicKey()
localPubKey, err := auth.GetPublicKey()
if err != nil {
return unknownKeyErr
}
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
// try the ollama service public key
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
@@ -1182,7 +1072,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
}
func RunServer(cmd *cobra.Command, _ []string) error {
if _, err := auth.GetPublicKey(); err != nil {
if err := initializeKeypair(); err != nil {
return err
}
@@ -1199,6 +1089,52 @@ func RunServer(cmd *cobra.Command, _ []string) error {
return err
}
func initializeKeypair() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
_, err = os.Stat(privKeyPath)
if os.IsNotExist(err) {
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return err
}
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
return fmt.Errorf("could not create directory %w", err)
}
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
return err
}
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
if err != nil {
return err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
return err
}
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
}
return nil
}
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {

View File

@@ -1,23 +0,0 @@
package cmd
import (
"os"
"path/filepath"
"golang.org/x/sys/unix"
)
func localCopy(src, target string) error {
dirPath := filepath.Dir(target)
if err := os.MkdirAll(dirPath, 0o755); err != nil {
return err
}
err := unix.Clonefile(src, target, 0)
if err != nil {
return err
}
return nil
}

View File

@@ -1,7 +0,0 @@
package cmd
import "errors"
func localCopy(src, target string) error {
return errors.New("no local copy implementation for linux")
}

View File

@@ -1,67 +0,0 @@
//go:build windows
// +build windows
package cmd
import (
"os"
"path/filepath"
"syscall"
"unsafe"
)
func localCopy(src, target string) error {
// Create target directory if it doesn't exist
dirPath := filepath.Dir(target)
if err := os.MkdirAll(dirPath, 0o755); err != nil {
return err
}
// Open source file
sourceFile, err := os.Open(src)
if err != nil {
return err
}
defer sourceFile.Close()
// Create target file
targetFile, err := os.Create(target)
if err != nil {
return err
}
defer targetFile.Close()
// Use CopyFileExW to copy the file
err = copyFileEx(src, target)
if err != nil {
return err
}
return nil
}
func copyFileEx(src, dst string) error {
kernel32 := syscall.NewLazyDLL("kernel32.dll")
copyFileEx := kernel32.NewProc("CopyFileExW")
srcPtr, err := syscall.UTF16PtrFromString(src)
if err != nil {
return err
}
dstPtr, err := syscall.UTF16PtrFromString(dst)
if err != nil {
return err
}
r1, _, err := copyFileEx.Call(
uintptr(unsafe.Pointer(srcPtr)),
uintptr(unsafe.Pointer(dstPtr)),
0, 0, 0, 0)
if r1 == 0 {
return err
}
return nil
}

View File

@@ -71,6 +71,11 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
"tokenizer.ggml.unknown_token_id": uint32(0),
}
if m.Params.HeadDimension > 0 {
kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
}
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}

View File

@@ -40,6 +40,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
- `model`: (required) the [model name](#model-names)
- `prompt`: the prompt to generate a response for
- `suffix`: the text after the model response
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
Advanced parameters (optional):
@@ -57,7 +58,8 @@ Advanced parameters (optional):
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
> [!IMPORTANT]
> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples
@@ -148,8 +150,44 @@ If `stream` is set to `false`, the response will be a single JSON object:
}
```
#### Request (with suffix)
##### Request
```shell
curl http://localhost:11434/api/generate -d '{
"model": "codellama:code",
"prompt": "def compute_gcd(a, b):",
"suffix": " return result",
"options": {
"temperature": 0
},
"stream": false
}'
```
##### Response
```json
{
"model": "codellama:code",
"created_at": "2024-07-22T20:47:51.147561Z",
"response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n",
"done": true,
"done_reason": "stop",
"context": [...],
"total_duration": 1162761250,
"load_duration": 6683708,
"prompt_eval_count": 17,
"prompt_eval_duration": 201222000,
"eval_count": 63,
"eval_duration": 953997000
}
```
#### Request (JSON mode)
> [!IMPORTANT]
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
##### Request
@@ -380,12 +418,14 @@ Generate the next message in a chat with a provided model. This is a streaming e
- `model`: (required) the [model name](#model-names)
- `messages`: the messages of the chat, this can be used to keep a chat memory
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
The `message` object has the following fields:
- `role`: the role of the message, either `system`, `user` or `assistant`
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
- `content`: the content of the message
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
- `tool_calls` (optional): a list of tools the model wants to use
Advanced parameters (optional):
@@ -622,6 +662,79 @@ curl http://localhost:11434/api/chat -d '{
}
```
#### Chat request (with tools)
##### Request
```
curl http://localhost:11434/api/chat -d '{
"model": "mistral",
"messages": [
{
"role": "user",
"content": "What is the weather today in Paris?"
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The location to get the weather for, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location", "format"]
}
}
}
]
}'
```
##### Response
```json
{
"model": "mistral:7b-instruct-v0.3-q4_K_M",
"created_at": "2024-07-22T20:33:28.123648Z",
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{
"function": {
"name": "get_current_weather",
"arguments": {
"format": "celsius",
"location": "Paris, FR"
}
}
}
]
},
"done_reason": "stop",
"done": true,
"total_duration": 885095291,
"load_duration": 3753500,
"prompt_eval_count": 122,
"prompt_eval_duration": 328493000,
"eval_count": 33,
"eval_duration": 552222000
}
```
## Create a Model
```shell
@@ -1026,7 +1139,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
## Generate Embeddings
```shell
POST /api/embeddings
POST /api/embed
```
Generate embeddings from a model
@@ -1034,10 +1147,11 @@ Generate embeddings from a model
### Parameters
- `model`: name of model to generate embeddings from
- `prompt`: text to generate embeddings for
- `input`: text or list of text to generate embeddings for
Advanced parameters:
- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
@@ -1046,9 +1160,9 @@ Advanced parameters:
#### Request
```shell
curl http://localhost:11434/api/embeddings -d '{
curl http://localhost:11434/api/embed -d '{
"model": "all-minilm",
"prompt": "Here is an article about llamas..."
"input": "Why is the sky blue?"
}'
```
@@ -1056,10 +1170,35 @@ curl http://localhost:11434/api/embeddings -d '{
```json
{
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
]]
}
```
#### Request (Multiple input)
```shell
curl http://localhost:11434/api/embed -d '{
"model": "all-minilm",
"input": ["Why is the sky blue?", "Why is the grass green?"]
}'
```
#### Response
```json
{
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
],[
-0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
]]
}
```
@@ -1106,3 +1245,45 @@ A single JSON object will be returned.
]
}
```
## Generate Embedding
> Note: this endpoint has been superseded by `/api/embed`
```shell
POST /api/embeddings
```
Generate embeddings from a model
### Parameters
- `model`: name of model to generate embeddings from
- `prompt`: text to generate embeddings for
Advanced parameters:
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
### Examples
#### Request
```shell
curl http://localhost:11434/api/embeddings -d '{
"model": "all-minilm",
"prompt": "Here is an article about llamas..."
}'
```
#### Response
```json
{
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]
}
```

View File

@@ -1,6 +1,7 @@
# Ollama Model File
> Note: `Modelfile` syntax is in development
> [!NOTE]
> `Modelfile` syntax is in development
A model file is the blueprint to create and share models with Ollama.

View File

@@ -78,8 +78,8 @@ curl http://localhost:11434/v1/chat/completions \
- [x] Streaming
- [x] JSON mode
- [x] Reproducible outputs
- [x] Tools (streaming support coming soon)
- [ ] Vision
- [ ] Function calling
- [ ] Logprobs
#### Supported request fields
@@ -97,9 +97,9 @@ curl http://localhost:11434/v1/chat/completions \
- [x] `temperature`
- [x] `top_p`
- [x] `max_tokens`
- [ ] `logit_bias`
- [ ] `tools`
- [x] `tools`
- [ ] `tool_choice`
- [ ] `logit_bias`
- [ ] `user`
- [ ] `n`

173
docs/template.md Normal file
View File

@@ -0,0 +1,173 @@
# Template
Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
## Basic Template Structure
A basic Go template consists of three main parts:
* **Layout**: The overall structure of the template.
* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
Here's an example of a simple chat template:
```gotmpl
{{- range .Messages }}
{{ .Role }}: {{ .Content }}
{{- end }}
```
In this example, we have:
* A basic messages structure (layout)
* Three variables: `Messages`, `Role`, and `Content` (variables)
* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
## Adding templates to your model
By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
```dockerfile
FROM llama3
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>
{{- end }}
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
{{ .Content }}<|eot_id|>
{{- end }}<|start_header_id|>assistant<|end_header_id|>
"""
```
## Variables
`System` (string): system prompt
`Prompt` (string): user prompt
`Response` (string): assistant response
`Suffix` (string): text inserted after the assistant's response
`Messages` (list): list of messages
`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
`Messages[].Content` (string): message content
`Messages[].ToolCalls` (list): list of tools the model wants to call
`Messages[].ToolCalls[].Function` (object): function to call
`Messages[].ToolCalls[].Function.Name` (string): function name
`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
`Tools` (list): list of tools the model can access
`Tools[].Type` (string): schema type. `type` is always `function`
`Tools[].Function` (object): function definition
`Tools[].Function.Name` (string): function name
`Tools[].Function.Description` (string): function description
`Tools[].Function.Parameters` (object): function parameters
`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
`Tools[].Function.Parameters.Required` (list): list of required properties
`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
`Tools[].Function.Parameters.Properties[].Type` (string): property type
`Tools[].Function.Parameters.Properties[].Description` (string): property description
`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
## Tips and Best Practices
Keep the following tips and best practices in mind when working with Go templates:
* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
## Examples
### Example Messages
#### ChatML
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
```gotmpl
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}
{{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ else }}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
```
### Example Tools
Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
#### Mistral
Mistral v0.3 and Mixtral 8x22B supports tool calling.
```gotmpl
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
{{ end }}{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }} {{ .Content }}</s>
{{- else if .ToolCalls }}[TOOL_CALLS] [
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}
```
### Example Fill-in-Middle
Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
#### CodeLlama
CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
```gotmpl
<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
```
> [!NOTE]
> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
#### Codestral
Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
```gotmpl
[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
```

View File

@@ -4,12 +4,45 @@ package integration
import (
"context"
"math"
"testing"
"time"
"github.com/ollama/ollama/api"
)
func floatsEqual32(a, b float32) bool {
return math.Abs(float64(a-b)) <= 1e-4
}
func floatsEqual64(a, b float64) bool {
return math.Abs(a-b) <= 1e-4
}
func TestAllMiniLMEmbeddings(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
req := api.EmbeddingRequest{
Model: "all-minilm",
Prompt: "why is the sky blue?",
}
res, err := embeddingTestHelper(ctx, t, req)
if err != nil {
t.Fatalf("error: %v", err)
}
if len(res.Embedding) != 384 {
t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
}
if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
}
}
func TestAllMiniLMEmbed(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
@@ -33,8 +66,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if res.Embeddings[0][0] != 0.010071031 {
t.Fatalf("expected 0.010071031, got %f", res.Embeddings[0][0])
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
}
}
@@ -61,12 +94,12 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if res.Embeddings[0][0] != 0.010071031 || res.Embeddings[1][0] != -0.009802706 {
t.Fatalf("expected 0.010071031 and -0.009802706, got %f and %f", res.Embeddings[0][0], res.Embeddings[1][0])
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
}
}
func TestAllMiniLmEmbedTruncate(t *testing.T) {
func TestAllMiniLMEmbedTruncate(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
@@ -135,6 +168,22 @@ func TestAllMiniLmEmbedTruncate(t *testing.T) {
}
}
func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("failed to pull model %s: %v", req.Model, err)
}
response, err := client.Embeddings(ctx, &req)
if err != nil {
return nil, err
}
return response, nil
}
func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()

View File

@@ -8,6 +8,7 @@ import (
"io"
"log/slog"
"math"
"math/rand/v2"
"net/http"
"net/url"
"os"
@@ -141,6 +142,32 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *regis
b.err = b.run(ctx, requestURL, opts)
}
func newBackoff(maxBackoff time.Duration) func(ctx context.Context) error {
var n int
return func(ctx context.Context) error {
if ctx.Err() != nil {
return ctx.Err()
}
n++
// n^2 backoff timer is a little smoother than the
// common choice of 2^n.
d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
// Randomize the delay between 0.5-1.5 x msec, in order
// to prevent accidental "thundering herd" problems.
d = time.Duration(float64(d) * (rand.Float64() + 0.5))
t := time.NewTimer(d)
defer t.Stop()
select {
case <-ctx.Done():
return ctx.Err()
case <-t.C:
return nil
}
}
}
func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
defer blobDownloadManager.Delete(b.Digest)
ctx, b.CancelFunc = context.WithCancel(ctx)
@@ -153,6 +180,52 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
_ = file.Truncate(b.Total)
directURL, err := func() (*url.URL, error) {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
backoff := newBackoff(10 * time.Second)
for {
// shallow clone opts to be used in the closure
// without affecting the outer opts.
newOpts := new(registryOptions)
*newOpts = *opts
newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) > 10 {
return errors.New("maxium redirects exceeded (10) for directURL")
}
// if the hostname is the same, allow the redirect
if req.URL.Hostname() == requestURL.Hostname() {
return nil
}
// stop at the first redirect that is not
// the same hostname as the original
// request.
return http.ErrUseLastResponse
}
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, nil, nil, newOpts)
if err != nil {
slog.Warn("failed to get direct URL; backing off and retrying", "err", err)
if err := backoff(ctx); err != nil {
return nil, err
}
continue
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusTemporaryRedirect {
return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
}
return resp.Location()
}
}()
if err != nil {
return err
}
g, inner := errgroup.WithContext(ctx)
g.SetLimit(numDownloadParts)
for i := range b.Parts {
@@ -165,7 +238,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
var err error
for try := 0; try < maxRetries; try++ {
w := io.NewOffsetWriter(file, part.StartsAt())
err = b.downloadChunk(inner, requestURL, w, part, opts)
err = b.downloadChunk(inner, directURL, w, part, opts)
switch {
case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
// return immediately if the context is canceled or the device is out of space

View File

@@ -32,7 +32,6 @@ import (
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
"golang.org/x/crypto/ssh"
)
var (
@@ -55,6 +54,8 @@ type registryOptions struct {
Username string
Password string
Token string
CheckRedirect func(req *http.Request, via []*http.Request) error
}
type Model struct {
@@ -1089,12 +1090,11 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
if anonymous {
// no user is associated with the public key, and the request requires non-anonymous access
pubKey, nestedErr := auth.GetPublicKey()
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(pubKey)))
if nestedErr != nil {
slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
return nil, errUnauthorized
}
return nil, &errtypes.UnknownOllamaKey{Key: localPubKey}
return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
}
// user is associated with the public key, but is not authorized to make the request
return nil, errUnauthorized
@@ -1133,7 +1133,9 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
req.ContentLength = contentLength
}
resp, err := http.DefaultClient.Do(req)
resp, err := (&http.Client{
CheckRedirect: regOpts.CheckRedirect,
}).Do(req)
if err != nil {
return nil, err
}

View File

@@ -263,13 +263,27 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
if t, err := template.Named(s); err != nil {
slog.Debug("template detection", "error", err)
} else {
tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
if err != nil {
return nil, err
}
tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
layers = append(layers, &layerGGML{tmpl, nil})
layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
layers = append(layers, &layerGGML{layer, nil})
if t.Parameters != nil {
var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
return nil, err
}
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
if err != nil {
return nil, err
}
layers = append(layers, &layerGGML{layer, nil})
}
}
}
}

View File

@@ -4,7 +4,6 @@ import (
"bytes"
"cmp"
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
@@ -24,10 +23,8 @@ import (
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
"golang.org/x/crypto/ssh"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
@@ -612,10 +609,9 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
defer cancel()
quantization := cmp.Or(r.Quantize, r.Quantization)
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
if errors.Is(err, errBadTemplate) {
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
}
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) {
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
} else if err != nil {
ch <- gin.H{"error": err.Error()}
}
}()
@@ -931,6 +927,7 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
_, err = os.Stat(path)
switch {
case errors.Is(err, os.ErrNotExist):
@@ -942,11 +939,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
c.Status(http.StatusOK)
return
}
if c.GetHeader("X-Redirect-Create") == "1" && s.isLocal(c) {
c.Header("LocalLocation", path)
c.Status(http.StatusTemporaryRedirect)
return
}
layer, err := NewLayer(c.Request.Body, "")
if err != nil {
@@ -962,54 +954,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
c.Status(http.StatusCreated)
}
func (s *Server) isLocal(c *gin.Context) bool {
if authz := c.GetHeader("Authorization"); authz != "" {
parts := strings.Split(authz, ":")
if len(parts) != 3 {
return false
}
clientPublicKey, _, _, _, err := ssh.ParseAuthorizedKey([]byte(fmt.Sprintf("ssh-ed25519 %s", parts[0])))
if err != nil {
return false
}
// partialRequestData is formatted as http.Method,http.requestURI,timestamp,nonce
requestData, err := base64.StdEncoding.DecodeString(parts[1])
if err != nil {
return false
}
partialRequestDataParts := strings.Split(string(requestData), ",")
if len(partialRequestDataParts) != 3 {
return false
}
signature, err := base64.StdEncoding.DecodeString(parts[2])
if err != nil {
return false
}
if err := clientPublicKey.Verify(requestData, &ssh.Signature{Format: clientPublicKey.Type(), Blob: signature}); err != nil {
return false
}
serverPublicKey, err := auth.GetPublicKey()
if err != nil {
slog.Error(fmt.Sprintf("failed to get server public key: %v", err))
return false
}
if bytes.Equal(serverPublicKey.Marshal(), clientPublicKey.Marshal()) {
return true
}
return false
}
return false
}
func isLocalIP(ip netip.Addr) bool {
if interfaces, err := net.Interfaces(); err == nil {
for _, iface := range interfaces {

View File

@@ -599,9 +599,10 @@ func TestCreateDetectTemplate(t *testing.T) {
}
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
})
})

View File

@@ -10,18 +10,15 @@ import (
"math"
"net/http"
"net/http/httptest"
"net/url"
"os"
"sort"
"strings"
"testing"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
@@ -530,62 +527,3 @@ func TestNormalize(t *testing.T) {
})
}
}
func TestIsLocalReal(t *testing.T) {
gin.SetMode(gin.TestMode)
clientPubLoc := t.TempDir()
t.Setenv("HOME", clientPubLoc)
_, err := auth.GetPublicKey()
if err != nil {
t.Fatal(err)
}
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
ctx.Request = &http.Request{
Header: make(http.Header),
}
requestURL := url.URL{
Scheme: "http",
Host: "localhost:8080",
Path: "/api/blobs",
}
request := &http.Request{
Method: http.MethodPost,
URL: &requestURL,
}
s := &Server{}
authz, err := api.Authorization(ctx, request)
if err != nil {
t.Fatal(err)
}
// Set client authorization header
ctx.Request.Header.Set("Authorization", authz)
if !s.isLocal(ctx) {
t.Fatal("Expected isLocal to return true")
}
t.Run("different server pubkey", func(t *testing.T) {
serverPubLoc := t.TempDir()
t.Setenv("HOME", serverPubLoc)
_, err := auth.GetPublicKey()
if err != nil {
t.Fatal(err)
}
if s.isLocal(ctx) {
t.Fatal("Expected isLocal to return false")
}
})
t.Run("invalid pubkey", func(t *testing.T) {
ctx.Request.Header.Set("Authorization", "sha-25616:invalid")
if s.isLocal(ctx) {
t.Fatal("Expected isLocal to return false")
}
})
}

View File

@@ -132,6 +132,8 @@ func (s *Scheduler) processPending(ctx context.Context) {
if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 {
numParallel = 1
slog.Warn("multimodal models don't support parallel requests yet")
} else if strings.Contains(pending.model.Config.ModelFamily, "bert") {
numParallel = runtime.NumCPU()
}
for {

8
template/alfred.json Normal file
View File

@@ -0,0 +1,8 @@
{
"stop": [
"<start_system>",
"<end_message>",
"<start_user>",
"<start_assistant>"
]
}

6
template/alpaca.json Normal file
View File

@@ -0,0 +1,6 @@
{
"stop": [
"### Instruction:",
"### Response"
]
}

6
template/chatml.json Normal file
View File

@@ -0,0 +1,6 @@
{
"stop": [
"<|im_start|>",
"<|im_end|>"
]
}

8
template/chatqa.json Normal file
View File

@@ -0,0 +1,8 @@
{
"stop": [
"System:",
"User:",
"Assistant:",
"<|begin_of_text|>"
]
}

View File

@@ -0,0 +1,7 @@
{
"stop": [
"Source:",
"Destination:",
"<step>"
]
}

View File

@@ -0,0 +1,6 @@
{
"stop": [
"User:",
"Assistant:"
]
}

View File

@@ -0,0 +1,6 @@
{
"stop": [
"<start_of_turn>",
"<end_of_turn>"
]
}

View File

@@ -0,0 +1,7 @@
{
"stop": [
"System:",
"Question:",
"Answer:"
]
}

View File

@@ -0,0 +1,8 @@
{
"stop": [
"[INST]",
"[/INST]",
"<<SYS>>",
"<</SYS>>"
]
}

View File

@@ -0,0 +1,7 @@
{
"stop": [
"<|start_header_id|>",
"<|end_header_id|>",
"<|eot_id|>"
]
}

6
template/magicoder.json Normal file
View File

@@ -0,0 +1,6 @@
{
"stop": [
"@@ Instruction",
"@@ Response"
]
}

View File

@@ -0,0 +1,6 @@
{
"stop": [
"<|im_start|>",
"<|im_end|>"
]
}

5
template/openchat.json Normal file
View File

@@ -0,0 +1,5 @@
{
"stop": [
"<|end_of_turn|>"
]
}

8
template/phi-3.json Normal file
View File

@@ -0,0 +1,8 @@
{
"stop": [
"<|end|>",
"<|system|>",
"<|user|>",
"<|assistant|>"
]
}

View File

@@ -0,0 +1,7 @@
{
"stop": [
"### System:",
"### User:",
"### Assistant"
]
}

View File

@@ -0,0 +1,7 @@
{
"stop": [
"### Instruction",
"### Response",
"<|endoftext|>"
]
}

View File

@@ -23,6 +23,7 @@ import (
var indexBytes []byte
//go:embed *.gotmpl
//go:embed *.json
var templatesFS embed.FS
var templatesOnce = sync.OnceValues(func() ([]*named, error) {
@@ -39,6 +40,15 @@ var templatesOnce = sync.OnceValues(func() ([]*named, error) {
// normalize line endings
t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
params, err := templatesFS.ReadFile(t.Name + ".json")
if err != nil {
continue
}
if err := json.Unmarshal(params, &t.Parameters); err != nil {
return nil, err
}
}
return templates, nil
@@ -48,6 +58,10 @@ type named struct {
Name string `json:"name"`
Template string `json:"template"`
Bytes []byte
Parameters *struct {
Stop []string `json:"stop"`
}
}
func (t named) Reader() io.Reader {

6
template/vicuna.json Normal file
View File

@@ -0,0 +1,6 @@
{
"stop": [
"USER:",
"ASSISTANT:"
]
}

8
template/zephyr.json Normal file
View File

@@ -0,0 +1,8 @@
{
"stop": [
"<|system|>",
"</s>",
"<|user|>",
"<|assistant|>"
]
}