Compare commits

..

43 Commits

Author SHA1 Message Date
Josh Yan
f30b54209c revert other pr change 2024-07-24 11:11:59 -07:00
Josh Yan
e39be4f63a short circuit 2024-07-23 17:14:34 -07:00
Josh Yan
b8c3d54f7a set homedir for windows --test 2024-07-23 16:28:42 -07:00
Josh Yan
c8434b0e69 rmv unsued 2024-07-23 16:03:19 -07:00
Josh Yan
65658e4077 default to post 2024-07-23 14:05:48 -07:00
Josh Yan
b29382b86f bin 2024-07-23 13:57:40 -07:00
Josh Yan
2efe2013a1 test 2024-07-23 13:54:22 -07:00
Josh Yan
5c3786f4d5 comments 2024-07-23 13:46:34 -07:00
Josh Yan
33848ad10f serverside copy 2024-07-23 12:26:05 -07:00
Josh Yan
ff06a2916d changes 2024-07-22 15:51:52 -07:00
Josh Yan
d923a59356 testing auth 2024-07-22 15:51:52 -07:00
Josh Yan
2b42ad5754 auth changes' 2024-07-22 15:51:52 -07:00
Josh Yan
e3253e5469 isLocal testing 2024-07-22 15:51:52 -07:00
Josh Yan
35b49739ec timecheck 2024-07-22 15:51:52 -07:00
Josh Yan
bd8596d32b cmt 2024-07-22 15:51:52 -07:00
Josh Yan
b85705162f remove knownhosts 2024-07-22 15:51:52 -07:00
Josh Yan
d62a3a1e2b lint 2024-07-22 15:51:52 -07:00
Josh Yan
de48cd681f clean 2024-07-22 15:51:52 -07:00
Josh Yan
5d0e078057 removed cmt and prints 2024-07-22 15:51:52 -07:00
Josh Yan
8d5739b833 removed client isLocal() 2024-07-22 15:51:52 -07:00
Josh Yan
b5ff0ed4ff lint 2024-07-22 15:51:52 -07:00
Josh Yan
857054f9fa lint 2024-07-22 15:51:52 -07:00
Josh Yan
6dd9be55e2 lint 2024-07-22 15:51:52 -07:00
Josh Yan
d70707a668 syscopy windows 2024-07-22 15:51:52 -07:00
Josh Yan
c88774ffeb os copy 2024-07-22 15:51:52 -07:00
Josh Yan
34d197000d rmv prints 2024-07-22 15:51:52 -07:00
Josh Yan
6c0a8379f6 local copy 2024-07-22 15:51:52 -07:00
Josh Yan
163ee9a8b0 isLocal firstdraft 2024-07-22 15:51:52 -07:00
Josh Yan
de7b2f3948 clean 2024-07-22 15:51:52 -07:00
Josh Yan
f27c66fb0c rm bench 2024-07-22 15:51:52 -07:00
Josh Yan
a238191798 rm config 2024-07-22 15:51:52 -07:00
Josh Yan
6436c7a375 rm config 2024-07-22 15:51:52 -07:00
Josh Yan
896a15874e clean 2024-07-22 15:51:52 -07:00
Josh Yan
56008688a1 local path 2024-07-22 15:51:52 -07:00
Josh Yan
d14d38e940 still works 2024-07-22 15:51:52 -07:00
Josh Yan
03df02883d rebase 2024-07-22 15:51:52 -07:00
Josh Yan
ae49abf80a benchmark 2024-07-22 15:51:52 -07:00
Josh Yan
2c450502db on disk copy 2024-07-22 15:51:52 -07:00
Josh Yan
46b76aeb46 start tests 2024-07-22 15:51:52 -07:00
Josh Yan
0e01da82d6 errorsis 2024-07-22 15:51:31 -07:00
Josh Yan
6b1b85ba3d hide initialize keypair 2024-07-22 15:41:04 -07:00
Josh Yan
5603441538 test 2024-07-22 13:58:50 -07:00
Josh Yan
76b4dfcc9e auth 2024-07-22 13:54:02 -07:00
42 changed files with 490 additions and 770 deletions

View File

@@ -31,7 +31,7 @@ jobs:
security set-keychain-settings -lut 3600 build.keychain
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: Build Darwin
env:
@@ -87,7 +87,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
@@ -141,7 +141,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install ROCm'
run: |
@@ -218,7 +218,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install CUDA'
run: |
@@ -306,7 +306,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: go get
- uses: actions/download-artifact@v4

View File

@@ -63,7 +63,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
@@ -163,7 +163,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install ROCm'
run: |
@@ -200,7 +200,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install CUDA'
run: |
@@ -255,7 +255,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: false
- run: |
case ${{ matrix.arch }} in
@@ -297,7 +297,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: |
case ${{ matrix.arch }} in

View File

@@ -1,4 +1,4 @@
ARG GOLANG_VERSION=1.22.5
ARG GOLANG_VERSION=1.22.1
ARG CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG CUDA_VERSION=11.3.1

View File

@@ -64,8 +64,7 @@ Here are some example models that can be downloaded:
| LLaVA | 7B | 4.5GB | `ollama run llava` |
| Solar | 10.7B | 6.1GB | `ollama run solar` |
> [!NOTE]
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
## Customize a model
@@ -297,7 +296,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
### Terminal

View File

@@ -17,6 +17,7 @@ import (
"bufio"
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
@@ -24,7 +25,10 @@ import (
"net/http"
"net/url"
"runtime"
"strings"
"time"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/version"
@@ -383,3 +387,16 @@ func (c *Client) Version(ctx context.Context) (string, error) {
return version.Version, nil
}
func Authorization(ctx context.Context, request *http.Request) (string, error) {
data := []byte(fmt.Sprintf("%s,%s,%d", request.Method, request.URL.RequestURI(), time.Now().Unix()))
token, err := auth.Sign(ctx, data)
if err != nil {
return "", err
}
// interleave request data into the token
key, sig, _ := strings.Cut(token, ":")
return fmt.Sprintf("%s:%s:%s", key, base64.StdEncoding.EncodeToString(data), sig), nil
}

View File

@@ -3,49 +3,68 @@ package auth
import (
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/pem"
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strings"
"golang.org/x/crypto/ssh"
)
const defaultPrivateKey = "id_ed25519"
func keyPath() (string, error) {
func privateKey() (ssh.Signer, error) {
home, err := os.UserHomeDir()
if err != nil {
return "", err
return nil, err
}
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
privateKeyFile, err := os.ReadFile(keyPath)
if errors.Is(err, os.ErrNotExist) {
err := initializeKeypair()
if err != nil {
return nil, err
}
return privateKey()
} else if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return nil, err
}
return ssh.ParsePrivateKey(privateKeyFile)
}
func GetPublicKey() (string, error) {
keyPath, err := keyPath()
func GetPublicKey() (ssh.PublicKey, error) {
// try to read pubkey first
home, err := os.UserHomeDir()
if err != nil {
return "", err
return nil, err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
pubkeyPath := filepath.Join(home, ".ollama", defaultPrivateKey+".pub")
pubKeyFile, err := os.ReadFile(pubkeyPath)
if errors.Is(err, os.ErrNotExist) {
// try from privateKey
privateKey, err := privateKey()
if err != nil {
return nil, fmt.Errorf("failed to read public key: %w", err)
}
return privateKey.PublicKey(), nil
} else if err != nil {
return nil, fmt.Errorf("failed to read public key: %w", err)
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
if err != nil {
return "", err
}
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
return strings.TrimSpace(string(publicKey)), nil
pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyFile)
return pubKey, err
}
func NewNonce(r io.Reader, length int) (string, error) {
@@ -58,25 +77,20 @@ func NewNonce(r io.Reader, length int) (string, error) {
}
func Sign(ctx context.Context, bts []byte) (string, error) {
keyPath, err := keyPath()
if err != nil {
return "", err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
privateKey, err := privateKey()
if err != nil {
return "", err
}
// get the pubkey, but remove the type
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
parts := bytes.Split(publicKey, []byte(" "))
publicKey, err := GetPublicKey()
if err != nil {
return "", err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
parts := bytes.Split(publicKeyBytes, []byte(" "))
if len(parts) < 2 {
return "", fmt.Errorf("malformed public key")
}
@@ -89,3 +103,49 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
// signature is <pubkey>:<signature>
return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
}
func initializeKeypair() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
_, err = os.Stat(privKeyPath)
if errors.Is(err, os.ErrNotExist) {
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return err
}
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
return fmt.Errorf("could not create directory %w", err)
}
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
return err
}
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
if err != nil {
return err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
return err
}
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
}
return nil
}

View File

@@ -4,10 +4,7 @@ import (
"archive/zip"
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/sha256"
"encoding/pem"
"errors"
"fmt"
"io"
@@ -15,6 +12,7 @@ import (
"math"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"path/filepath"
@@ -112,7 +110,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
path = tempfile
}
digest, err := createBlob(cmd, client, path)
digest, err := createBlob(cmd, path)
if err != nil {
return err
}
@@ -263,7 +261,9 @@ func tempZipFiles(path string) (string, error) {
return tempfile.Name(), nil
}
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
var ErrBlobExists = errors.New("blob exists")
func createBlob(cmd *cobra.Command, path string) (string, error) {
bin, err := os.Open(path)
if err != nil {
return "", err
@@ -280,12 +280,65 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
}
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
// Use our new CreateBlob request which will include the file path
// The server checks for that file and if the server is local, it will copy the file over
// If the local copy fails, the server will continue to the default local copy
// If that fails, it will continue with the server POST
err = CreateBlob(cmd.Context(), path, digest, bin)
if errors.Is(err, ErrBlobExists) {
return digest, nil
}
if err != nil {
return "", err
}
return digest, nil
}
func CreateBlob(ctx context.Context, src, digest string, r *os.File) (error) {
ollamaHost := envconfig.Host
client := http.DefaultClient
base := &url.URL{
Scheme: ollamaHost.Scheme,
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
}
path := fmt.Sprintf("/api/blobs/%s", digest)
requestURL := base.JoinPath(path)
request, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), r)
if err != nil {
return err
}
authz, err := api.Authorization(ctx, request)
if err != nil {
return err
}
request.Header.Set("Authorization", authz)
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
request.Header.Set("X-Ollama-File", src)
resp, err := client.Do(request)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusCreated {
return nil
}
if resp.StatusCode == http.StatusOK {
return ErrBlobExists
}
return err
}
func RunHandler(cmd *cobra.Command, args []string) error {
interactive := true
@@ -379,11 +432,12 @@ func errFromUnknownKey(unknownKeyErr error) error {
if len(matches) > 0 {
serverPubKey := matches[0]
localPubKey, err := auth.GetPublicKey()
publicKey, err := auth.GetPublicKey()
if err != nil {
return unknownKeyErr
}
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
// try the ollama service public key
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
@@ -1072,7 +1126,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
}
func RunServer(cmd *cobra.Command, _ []string) error {
if err := initializeKeypair(); err != nil {
if _, err := auth.GetPublicKey(); err != nil {
return err
}
@@ -1089,52 +1143,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
return err
}
func initializeKeypair() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
_, err = os.Stat(privKeyPath)
if os.IsNotExist(err) {
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return err
}
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
return fmt.Errorf("could not create directory %w", err)
}
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
return err
}
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
if err != nil {
return err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
return err
}
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
}
return nil
}
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {

View File

@@ -71,11 +71,6 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
"tokenizer.ggml.unknown_token_id": uint32(0),
}
if m.Params.HeadDimension > 0 {
kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
}
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}

View File

@@ -40,7 +40,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
- `model`: (required) the [model name](#model-names)
- `prompt`: the prompt to generate a response for
- `suffix`: the text after the model response
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
Advanced parameters (optional):
@@ -58,8 +57,7 @@ Advanced parameters (optional):
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
> [!IMPORTANT]
> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples
@@ -150,44 +148,8 @@ If `stream` is set to `false`, the response will be a single JSON object:
}
```
#### Request (with suffix)
##### Request
```shell
curl http://localhost:11434/api/generate -d '{
"model": "codellama:code",
"prompt": "def compute_gcd(a, b):",
"suffix": " return result",
"options": {
"temperature": 0
},
"stream": false
}'
```
##### Response
```json
{
"model": "codellama:code",
"created_at": "2024-07-22T20:47:51.147561Z",
"response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n",
"done": true,
"done_reason": "stop",
"context": [...],
"total_duration": 1162761250,
"load_duration": 6683708,
"prompt_eval_count": 17,
"prompt_eval_duration": 201222000,
"eval_count": 63,
"eval_duration": 953997000
}
```
#### Request (JSON mode)
> [!IMPORTANT]
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
##### Request
@@ -418,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e
- `model`: (required) the [model name](#model-names)
- `messages`: the messages of the chat, this can be used to keep a chat memory
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
The `message` object has the following fields:
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
- `role`: the role of the message, either `system`, `user` or `assistant`
- `content`: the content of the message
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
- `tool_calls` (optional): a list of tools the model wants to use
Advanced parameters (optional):
@@ -662,79 +622,6 @@ curl http://localhost:11434/api/chat -d '{
}
```
#### Chat request (with tools)
##### Request
```
curl http://localhost:11434/api/chat -d '{
"model": "mistral",
"messages": [
{
"role": "user",
"content": "What is the weather today in Paris?"
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The location to get the weather for, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location", "format"]
}
}
}
]
}'
```
##### Response
```json
{
"model": "mistral:7b-instruct-v0.3-q4_K_M",
"created_at": "2024-07-22T20:33:28.123648Z",
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{
"function": {
"name": "get_current_weather",
"arguments": {
"format": "celsius",
"location": "Paris, FR"
}
}
}
]
},
"done_reason": "stop",
"done": true,
"total_duration": 885095291,
"load_duration": 3753500,
"prompt_eval_count": 122,
"prompt_eval_duration": 328493000,
"eval_count": 33,
"eval_duration": 552222000
}
```
## Create a Model
```shell
@@ -1139,7 +1026,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
## Generate Embeddings
```shell
POST /api/embed
POST /api/embeddings
```
Generate embeddings from a model
@@ -1147,11 +1034,10 @@ Generate embeddings from a model
### Parameters
- `model`: name of model to generate embeddings from
- `input`: text or list of text to generate embeddings for
- `prompt`: text to generate embeddings for
Advanced parameters:
- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
@@ -1160,9 +1046,9 @@ Advanced parameters:
#### Request
```shell
curl http://localhost:11434/api/embed -d '{
curl http://localhost:11434/api/embeddings -d '{
"model": "all-minilm",
"input": "Why is the sky blue?"
"prompt": "Here is an article about llamas..."
}'
```
@@ -1170,35 +1056,10 @@ curl http://localhost:11434/api/embed -d '{
```json
{
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
]]
}
```
#### Request (Multiple input)
```shell
curl http://localhost:11434/api/embed -d '{
"model": "all-minilm",
"input": ["Why is the sky blue?", "Why is the grass green?"]
}'
```
#### Response
```json
{
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
],[
-0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
]]
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]
}
```
@@ -1245,45 +1106,3 @@ A single JSON object will be returned.
]
}
```
## Generate Embedding
> Note: this endpoint has been superseded by `/api/embed`
```shell
POST /api/embeddings
```
Generate embeddings from a model
### Parameters
- `model`: name of model to generate embeddings from
- `prompt`: text to generate embeddings for
Advanced parameters:
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
### Examples
#### Request
```shell
curl http://localhost:11434/api/embeddings -d '{
"model": "all-minilm",
"prompt": "Here is an article about llamas..."
}'
```
#### Response
```json
{
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]
}
```

View File

@@ -1,7 +1,6 @@
# Ollama Model File
> [!NOTE]
> `Modelfile` syntax is in development
> Note: `Modelfile` syntax is in development
A model file is the blueprint to create and share models with Ollama.

View File

@@ -78,8 +78,8 @@ curl http://localhost:11434/v1/chat/completions \
- [x] Streaming
- [x] JSON mode
- [x] Reproducible outputs
- [x] Tools (streaming support coming soon)
- [ ] Vision
- [ ] Function calling
- [ ] Logprobs
#### Supported request fields
@@ -97,9 +97,9 @@ curl http://localhost:11434/v1/chat/completions \
- [x] `temperature`
- [x] `top_p`
- [x] `max_tokens`
- [x] `tools`
- [ ] `tool_choice`
- [ ] `logit_bias`
- [ ] `tools`
- [ ] `tool_choice`
- [ ] `user`
- [ ] `n`

View File

@@ -1,173 +0,0 @@
# Template
Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
## Basic Template Structure
A basic Go template consists of three main parts:
* **Layout**: The overall structure of the template.
* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
Here's an example of a simple chat template:
```gotmpl
{{- range .Messages }}
{{ .Role }}: {{ .Content }}
{{- end }}
```
In this example, we have:
* A basic messages structure (layout)
* Three variables: `Messages`, `Role`, and `Content` (variables)
* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
## Adding templates to your model
By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
```dockerfile
FROM llama3
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>
{{- end }}
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
{{ .Content }}<|eot_id|>
{{- end }}<|start_header_id|>assistant<|end_header_id|>
"""
```
## Variables
`System` (string): system prompt
`Prompt` (string): user prompt
`Response` (string): assistant response
`Suffix` (string): text inserted after the assistant's response
`Messages` (list): list of messages
`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
`Messages[].Content` (string): message content
`Messages[].ToolCalls` (list): list of tools the model wants to call
`Messages[].ToolCalls[].Function` (object): function to call
`Messages[].ToolCalls[].Function.Name` (string): function name
`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
`Tools` (list): list of tools the model can access
`Tools[].Type` (string): schema type. `type` is always `function`
`Tools[].Function` (object): function definition
`Tools[].Function.Name` (string): function name
`Tools[].Function.Description` (string): function description
`Tools[].Function.Parameters` (object): function parameters
`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
`Tools[].Function.Parameters.Required` (list): list of required properties
`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
`Tools[].Function.Parameters.Properties[].Type` (string): property type
`Tools[].Function.Parameters.Properties[].Description` (string): property description
`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
## Tips and Best Practices
Keep the following tips and best practices in mind when working with Go templates:
* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
## Examples
### Example Messages
#### ChatML
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
```gotmpl
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}
{{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ else }}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
```
### Example Tools
Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
#### Mistral
Mistral v0.3 and Mixtral 8x22B supports tool calling.
```gotmpl
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
{{ end }}{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }} {{ .Content }}</s>
{{- else if .ToolCalls }}[TOOL_CALLS] [
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}
```
### Example Fill-in-Middle
Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
#### CodeLlama
CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
```gotmpl
<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
```
> [!NOTE]
> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
#### Codestral
Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
```gotmpl
[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
```

View File

@@ -4,45 +4,12 @@ package integration
import (
"context"
"math"
"testing"
"time"
"github.com/ollama/ollama/api"
)
func floatsEqual32(a, b float32) bool {
return math.Abs(float64(a-b)) <= 1e-4
}
func floatsEqual64(a, b float64) bool {
return math.Abs(a-b) <= 1e-4
}
func TestAllMiniLMEmbeddings(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
req := api.EmbeddingRequest{
Model: "all-minilm",
Prompt: "why is the sky blue?",
}
res, err := embeddingTestHelper(ctx, t, req)
if err != nil {
t.Fatalf("error: %v", err)
}
if len(res.Embedding) != 384 {
t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
}
if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
}
}
func TestAllMiniLMEmbed(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
@@ -66,8 +33,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
if res.Embeddings[0][0] != 0.010071031 {
t.Fatalf("expected 0.010071031, got %f", res.Embeddings[0][0])
}
}
@@ -94,12 +61,12 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
if res.Embeddings[0][0] != 0.010071031 || res.Embeddings[1][0] != -0.009802706 {
t.Fatalf("expected 0.010071031 and -0.009802706, got %f and %f", res.Embeddings[0][0], res.Embeddings[1][0])
}
}
func TestAllMiniLMEmbedTruncate(t *testing.T) {
func TestAllMiniLmEmbedTruncate(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
@@ -168,22 +135,6 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
}
}
func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("failed to pull model %s: %v", req.Model, err)
}
response, err := client.Embeddings(ctx, &req)
if err != nil {
return nil, err
}
return response, nil
}
func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()

23
server/copy_darwin.go Normal file
View File

@@ -0,0 +1,23 @@
package server
import (
"os"
"path/filepath"
"golang.org/x/sys/unix"
)
func localCopy(src, target string) error {
dirPath := filepath.Dir(target)
if err := os.MkdirAll(dirPath, 0o755); err != nil {
return err
}
err := unix.Clonefile(src, target, 0)
if err != nil {
return err
}
return nil
}

7
server/copy_linux.go Normal file
View File

@@ -0,0 +1,7 @@
package server
import "errors"
func localCopy(src, target string) error {
return errors.New("no local copy implementation for linux")
}

67
server/copy_windows.go Normal file
View File

@@ -0,0 +1,67 @@
//go:build windows
// +build windows
package server
import (
"os"
"path/filepath"
"syscall"
"unsafe"
)
func localCopy(src, target string) error {
// Create target directory if it doesn't exist
dirPath := filepath.Dir(target)
if err := os.MkdirAll(dirPath, 0o755); err != nil {
return err
}
// Open source file
sourceFile, err := os.Open(src)
if err != nil {
return err
}
defer sourceFile.Close()
// Create target file
targetFile, err := os.Create(target)
if err != nil {
return err
}
defer targetFile.Close()
// Use CopyFileExW to copy the file
err = copyFileEx(src, target)
if err != nil {
return err
}
return nil
}
func copyFileEx(src, dst string) error {
kernel32 := syscall.NewLazyDLL("kernel32.dll")
copyFileEx := kernel32.NewProc("CopyFileExW")
srcPtr, err := syscall.UTF16PtrFromString(src)
if err != nil {
return err
}
dstPtr, err := syscall.UTF16PtrFromString(dst)
if err != nil {
return err
}
r1, _, err := copyFileEx.Call(
uintptr(unsafe.Pointer(srcPtr)),
uintptr(unsafe.Pointer(dstPtr)),
0, 0, 0, 0)
if r1 == 0 {
return err
}
return nil
}

View File

@@ -8,7 +8,6 @@ import (
"io"
"log/slog"
"math"
"math/rand/v2"
"net/http"
"net/url"
"os"
@@ -142,32 +141,6 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *regis
b.err = b.run(ctx, requestURL, opts)
}
func newBackoff(maxBackoff time.Duration) func(ctx context.Context) error {
var n int
return func(ctx context.Context) error {
if ctx.Err() != nil {
return ctx.Err()
}
n++
// n^2 backoff timer is a little smoother than the
// common choice of 2^n.
d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
// Randomize the delay between 0.5-1.5 x msec, in order
// to prevent accidental "thundering herd" problems.
d = time.Duration(float64(d) * (rand.Float64() + 0.5))
t := time.NewTimer(d)
defer t.Stop()
select {
case <-ctx.Done():
return ctx.Err()
case <-t.C:
return nil
}
}
}
func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
defer blobDownloadManager.Delete(b.Digest)
ctx, b.CancelFunc = context.WithCancel(ctx)
@@ -180,52 +153,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
_ = file.Truncate(b.Total)
directURL, err := func() (*url.URL, error) {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
backoff := newBackoff(10 * time.Second)
for {
// shallow clone opts to be used in the closure
// without affecting the outer opts.
newOpts := new(registryOptions)
*newOpts = *opts
newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) > 10 {
return errors.New("maxium redirects exceeded (10) for directURL")
}
// if the hostname is the same, allow the redirect
if req.URL.Hostname() == requestURL.Hostname() {
return nil
}
// stop at the first redirect that is not
// the same hostname as the original
// request.
return http.ErrUseLastResponse
}
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, nil, nil, newOpts)
if err != nil {
slog.Warn("failed to get direct URL; backing off and retrying", "err", err)
if err := backoff(ctx); err != nil {
return nil, err
}
continue
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusTemporaryRedirect {
return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
}
return resp.Location()
}
}()
if err != nil {
return err
}
g, inner := errgroup.WithContext(ctx)
g.SetLimit(numDownloadParts)
for i := range b.Parts {
@@ -238,7 +165,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
var err error
for try := 0; try < maxRetries; try++ {
w := io.NewOffsetWriter(file, part.StartsAt())
err = b.downloadChunk(inner, directURL, w, part, opts)
err = b.downloadChunk(inner, requestURL, w, part, opts)
switch {
case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
// return immediately if the context is canceled or the device is out of space

View File

@@ -32,6 +32,7 @@ import (
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
"golang.org/x/crypto/ssh"
)
var (
@@ -54,8 +55,6 @@ type registryOptions struct {
Username string
Password string
Token string
CheckRedirect func(req *http.Request, via []*http.Request) error
}
type Model struct {
@@ -1090,11 +1089,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
if anonymous {
// no user is associated with the public key, and the request requires non-anonymous access
pubKey, nestedErr := auth.GetPublicKey()
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(pubKey)))
if nestedErr != nil {
slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
return nil, errUnauthorized
}
return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
return nil, &errtypes.UnknownOllamaKey{Key: localPubKey}
}
// user is associated with the public key, but is not authorized to make the request
return nil, errUnauthorized
@@ -1133,9 +1133,7 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
req.ContentLength = contentLength
}
resp, err := (&http.Client{
CheckRedirect: regOpts.CheckRedirect,
}).Do(req)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}

View File

@@ -263,27 +263,13 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
if t, err := template.Named(s); err != nil {
slog.Debug("template detection", "error", err)
} else {
layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
if err != nil {
return nil, err
}
layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
layers = append(layers, &layerGGML{layer, nil})
if t.Parameters != nil {
var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
return nil, err
}
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
if err != nil {
return nil, err
}
layers = append(layers, &layerGGML{layer, nil})
}
tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
layers = append(layers, &layerGGML{tmpl, nil})
}
}
}

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"cmp"
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
@@ -23,8 +24,10 @@ import (
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
"golang.org/x/crypto/ssh"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
@@ -609,9 +612,10 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
defer cancel()
quantization := cmp.Or(r.Quantize, r.Quantization)
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) {
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
} else if err != nil {
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
if errors.Is(err, errBadTemplate) {
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
}
ch <- gin.H{"error": err.Error()}
}
}()
@@ -927,7 +931,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
_, err = os.Stat(path)
switch {
case errors.Is(err, os.ErrNotExist):
@@ -940,6 +943,14 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
return
}
if c.GetHeader("X-Ollama-File") != "" && s.isLocal(c) {
err = localBlobCopy(c.GetHeader("X-Ollama-File"), path)
if err == nil {
c.Status(http.StatusCreated)
return
}
}
layer, err := NewLayer(c.Request.Body, "")
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@@ -954,6 +965,108 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
c.Status(http.StatusCreated)
}
func localBlobCopy (src, dest string) error {
_, err := os.Stat(src)
if err != nil {
return err
}
err = localCopy(src, dest)
if err == nil {
return nil
}
err = defaultCopy(src, dest)
if err == nil {
return nil
}
return fmt.Errorf("failed to copy blob")
}
func (s *Server) isLocal(c *gin.Context) bool {
if authz := c.GetHeader("Authorization"); authz != "" {
parts := strings.Split(authz, ":")
if len(parts) != 3 {
return false
}
clientPublicKey, _, _, _, err := ssh.ParseAuthorizedKey([]byte(fmt.Sprintf("ssh-ed25519 %s", parts[0])))
if err != nil {
return false
}
// partialRequestData is formatted as http.Method,http.requestURI,timestamp,nonce
requestData, err := base64.StdEncoding.DecodeString(parts[1])
if err != nil {
return false
}
partialRequestDataParts := strings.Split(string(requestData), ",")
if len(partialRequestDataParts) != 3 {
return false
}
signature, err := base64.StdEncoding.DecodeString(parts[2])
if err != nil {
return false
}
if err := clientPublicKey.Verify(requestData, &ssh.Signature{Format: clientPublicKey.Type(), Blob: signature}); err != nil {
return false
}
serverPublicKey, err := auth.GetPublicKey()
if err != nil {
slog.Error(fmt.Sprintf("failed to get server public key: %v", err))
return false
}
if bytes.Equal(serverPublicKey.Marshal(), clientPublicKey.Marshal()) {
return true
}
return false
}
return false
}
func defaultCopy(path string, dest string) error {
// This function should be called if the server is local
// It should find the model directory, copy the blob over, and return the digest
dirPath := filepath.Dir(dest)
if err := os.MkdirAll(dirPath, 0o755); err != nil {
return err
}
// Copy blob over
sourceFile, err := os.Open(path)
if err != nil {
return fmt.Errorf("could not open source file: %v", err)
}
defer sourceFile.Close()
destFile, err := os.Create(dest)
if err != nil {
return fmt.Errorf("could not create destination file: %v", err)
}
defer destFile.Close()
_, err = io.CopyBuffer(destFile, sourceFile, make([]byte, 4*1024*1024))
if err != nil {
return fmt.Errorf("error copying file: %v", err)
}
err = destFile.Sync()
if err != nil {
return fmt.Errorf("error flushing file: %v", err)
}
return nil
}
func isLocalIP(ip netip.Addr) bool {
if interfaces, err := net.Interfaces(); err == nil {
for _, iface := range interfaces {

View File

@@ -599,10 +599,9 @@ func TestCreateDetectTemplate(t *testing.T) {
}
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
})
})

View File

@@ -10,15 +10,18 @@ import (
"math"
"net/http"
"net/http/httptest"
"net/url"
"os"
"sort"
"strings"
"testing"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
@@ -527,3 +530,64 @@ func TestNormalize(t *testing.T) {
})
}
}
func TestIsLocalReal(t *testing.T) {
gin.SetMode(gin.TestMode)
clientPubLoc := t.TempDir()
t.Setenv("HOME", clientPubLoc)
t.Setenv("USERPROFILE", clientPubLoc)
_, err := auth.GetPublicKey()
if err != nil {
t.Fatal(err)
}
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
ctx.Request = &http.Request{
Header: make(http.Header),
}
requestURL := url.URL{
Scheme: "http",
Host: "localhost:8080",
Path: "/api/blobs",
}
request := &http.Request{
Method: http.MethodPost,
URL: &requestURL,
}
s := &Server{}
authz, err := api.Authorization(ctx, request)
if err != nil {
t.Fatal(err)
}
// Set client authorization header
ctx.Request.Header.Set("Authorization", authz)
if !s.isLocal(ctx) {
t.Fatal("Expected isLocal to return true")
}
t.Run("different server pubkey", func(t *testing.T) {
serverPubLoc := t.TempDir()
t.Setenv("HOME", serverPubLoc)
t.Setenv("USERPROFILE", serverPubLoc)
_, err := auth.GetPublicKey()
if err != nil {
t.Fatal(err)
}
if s.isLocal(ctx) {
t.Fatal("Expected isLocal to return false")
}
})
t.Run("invalid pubkey", func(t *testing.T) {
ctx.Request.Header.Set("Authorization", "sha-25616:invalid")
if s.isLocal(ctx) {
t.Fatal("Expected isLocal to return false")
}
})
}

View File

@@ -132,8 +132,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 {
numParallel = 1
slog.Warn("multimodal models don't support parallel requests yet")
} else if strings.Contains(pending.model.Config.ModelFamily, "bert") {
numParallel = runtime.NumCPU()
}
for {

View File

@@ -1,8 +0,0 @@
{
"stop": [
"<start_system>",
"<end_message>",
"<start_user>",
"<start_assistant>"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"### Instruction:",
"### Response"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"<|im_start|>",
"<|im_end|>"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"System:",
"User:",
"Assistant:",
"<|begin_of_text|>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"Source:",
"Destination:",
"<step>"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"User:",
"Assistant:"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"<start_of_turn>",
"<end_of_turn>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"System:",
"Question:",
"Answer:"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"[INST]",
"[/INST]",
"<<SYS>>",
"<</SYS>>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"<|start_header_id|>",
"<|end_header_id|>",
"<|eot_id|>"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"@@ Instruction",
"@@ Response"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"<|im_start|>",
"<|im_end|>"
]
}

View File

@@ -1,5 +0,0 @@
{
"stop": [
"<|end_of_turn|>"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"<|end|>",
"<|system|>",
"<|user|>",
"<|assistant|>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"### System:",
"### User:",
"### Assistant"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"### Instruction",
"### Response",
"<|endoftext|>"
]
}

View File

@@ -23,7 +23,6 @@ import (
var indexBytes []byte
//go:embed *.gotmpl
//go:embed *.json
var templatesFS embed.FS
var templatesOnce = sync.OnceValues(func() ([]*named, error) {
@@ -40,15 +39,6 @@ var templatesOnce = sync.OnceValues(func() ([]*named, error) {
// normalize line endings
t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
params, err := templatesFS.ReadFile(t.Name + ".json")
if err != nil {
continue
}
if err := json.Unmarshal(params, &t.Parameters); err != nil {
return nil, err
}
}
return templates, nil
@@ -58,10 +48,6 @@ type named struct {
Name string `json:"name"`
Template string `json:"template"`
Bytes []byte
Parameters *struct {
Stop []string `json:"stop"`
}
}
func (t named) Reader() io.Reader {

View File

@@ -1,6 +0,0 @@
{
"stop": [
"USER:",
"ASSISTANT:"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"<|system|>",
"</s>",
"<|user|>",
"<|assistant|>"
]
}