revert other pr change

short circuit
set homedir for windows --test
2026-01-03 04:59:19 -05:00 · 2024-07-24 11:11:59 -07:00 · 2024-07-23 17:14:34 -07:00 · 2024-07-23 16:28:42 -07:00 · 2024-07-23 16:03:19 -07:00 · 2024-07-23 14:05:48 -07:00
42 changed files with 490 additions and 770 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,7 +31,7 @@ jobs:
          security set-keychain-settings -lut 3600 build.keychain
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: Build Darwin
        env:
@@ -87,7 +87,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -141,7 +141,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
@@ -218,7 +218,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -306,7 +306,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get
      - uses: actions/download-artifact@v4
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -63,7 +63,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -163,7 +163,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
@@ -200,7 +200,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -255,7 +255,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: false
      - run: |
          case ${{ matrix.arch }} in
@@ -297,7 +297,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: |
          case ${{ matrix.arch }} in
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-ARG GOLANG_VERSION=1.22.5
+ARG GOLANG_VERSION=1.22.1
 ARG CMAKE_VERSION=3.22.1
 # this CUDA_VERSION corresponds with the one specified in docs/gpu.md
 ARG CUDA_VERSION=11.3.1
--- a/README.md
+++ b/README.md
@@ -64,8 +64,7 @@ Here are some example models that can be downloaded:
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Solar              | 10.7B      | 6.1GB | `ollama run solar`             |

-> [!NOTE]
-> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
+> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.

 ## Customize a model

@@ -297,7 +296,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio)
 - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)

 ### Terminal

--- a/api/client.go
+++ b/api/client.go
@@ -17,6 +17,7 @@ import (
 	"bufio"
 	"bytes"
 	"context"
+	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -24,7 +25,10 @@ import (
 	"net/http"
 	"net/url"
 	"runtime"
+	"strings"
+	"time"

+	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/version"
@@ -383,3 +387,16 @@ func (c *Client) Version(ctx context.Context) (string, error) {

 	return version.Version, nil
 }
+
+func Authorization(ctx context.Context, request *http.Request) (string, error) {
+	data := []byte(fmt.Sprintf("%s,%s,%d", request.Method, request.URL.RequestURI(), time.Now().Unix()))
+
+	token, err := auth.Sign(ctx, data)
+	if err != nil {
+		return "", err
+	}
+
+	// interleave request data into the token
+	key, sig, _ := strings.Cut(token, ":")
+	return fmt.Sprintf("%s:%s:%s", key, base64.StdEncoding.EncodeToString(data), sig), nil
+}
--- a/auth/auth.go
+++ b/auth/auth.go
@@ -3,49 +3,68 @@ package auth
 import (
 	"bytes"
 	"context"
+	"crypto/ed25519"
 	"crypto/rand"
 	"encoding/base64"
+	"encoding/pem"
+	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
-	"strings"

 	"golang.org/x/crypto/ssh"
 )

 const defaultPrivateKey = "id_ed25519"

-func keyPath() (string, error) {
+func privateKey() (ssh.Signer, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
-		return "", err
+		return nil, err
 	}

-	return filepath.Join(home, ".ollama", defaultPrivateKey), nil
+	keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
+	privateKeyFile, err := os.ReadFile(keyPath)
+	if errors.Is(err, os.ErrNotExist) {
+		err := initializeKeypair()
+		if err != nil {
+			return nil, err
+		}
+
+		return privateKey()
+	} else if err != nil {
+		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
+		return nil, err
+	}
+
+	return ssh.ParsePrivateKey(privateKeyFile)
 }

-func GetPublicKey() (string, error) {
-	keyPath, err := keyPath()
+func GetPublicKey() (ssh.PublicKey, error) {
+	// try to read pubkey first
+	home, err := os.UserHomeDir()
 	if err != nil {
-		return "", err
+		return nil, err
 	}

-	privateKeyFile, err := os.ReadFile(keyPath)
-	if err != nil {
-		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
-		return "", err
+	pubkeyPath := filepath.Join(home, ".ollama", defaultPrivateKey+".pub")
+	pubKeyFile, err := os.ReadFile(pubkeyPath)
+	if errors.Is(err, os.ErrNotExist) {
+		// try from privateKey
+		privateKey, err := privateKey()
+		if err != nil {
+			return nil, fmt.Errorf("failed to read public key: %w", err)
+		}
+
+		return privateKey.PublicKey(), nil
+	} else if err != nil {
+		return nil, fmt.Errorf("failed to read public key: %w", err)
 	}

-	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
-	if err != nil {
-		return "", err
-	}
-
-	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
-
-	return strings.TrimSpace(string(publicKey)), nil
+	pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyFile)
+	return pubKey, err
 }

 func NewNonce(r io.Reader, length int) (string, error) {
@@ -58,25 +77,20 @@ func NewNonce(r io.Reader, length int) (string, error) {
 }

 func Sign(ctx context.Context, bts []byte) (string, error) {
-	keyPath, err := keyPath()
-	if err != nil {
-		return "", err
-	}
-
-	privateKeyFile, err := os.ReadFile(keyPath)
-	if err != nil {
-		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
-		return "", err
-	}
-
-	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
+	privateKey, err := privateKey()
 	if err != nil {
 		return "", err
 	}

 	// get the pubkey, but remove the type
-	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
-	parts := bytes.Split(publicKey, []byte(" "))
+	publicKey, err := GetPublicKey()
+	if err != nil {
+		return "", err
+	}
+
+	publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
+
+	parts := bytes.Split(publicKeyBytes, []byte(" "))
 	if len(parts) < 2 {
 		return "", fmt.Errorf("malformed public key")
 	}
@@ -89,3 +103,49 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
 	// signature is <pubkey>:<signature>
 	return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
 }
+
+func initializeKeypair() error {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
+	pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
+
+	_, err = os.Stat(privKeyPath)
+	if errors.Is(err, os.ErrNotExist) {
+		fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
+		cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
+		if err != nil {
+			return err
+		}
+
+		privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
+		if err != nil {
+			return err
+		}
+
+		if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
+			return fmt.Errorf("could not create directory %w", err)
+		}
+
+		if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
+			return err
+		}
+
+		sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
+		if err != nil {
+			return err
+		}
+
+		publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
+
+		if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
+			return err
+		}
+
+		fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
+	}
+	return nil
+}
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -4,10 +4,7 @@ import (
 	"archive/zip"
 	"bytes"
 	"context"
-	"crypto/ed25519"
-	"crypto/rand"
 	"crypto/sha256"
-	"encoding/pem"
 	"errors"
 	"fmt"
 	"io"
@@ -15,6 +12,7 @@ import (
 	"math"
 	"net"
 	"net/http"
+	"net/url"
 	"os"
 	"os/signal"
 	"path/filepath"
@@ -112,7 +110,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				path = tempfile
 			}

-			digest, err := createBlob(cmd, client, path)
+			digest, err := createBlob(cmd, path)
 			if err != nil {
 				return err
 			}
@@ -263,7 +261,9 @@ func tempZipFiles(path string) (string, error) {
 	return tempfile.Name(), nil
 }

-func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
+var ErrBlobExists = errors.New("blob exists")
+
+func createBlob(cmd *cobra.Command, path string) (string, error) {
 	bin, err := os.Open(path)
 	if err != nil {
 		return "", err
@@ -280,12 +280,65 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
 	}

 	digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
-	if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
+
+	// Use our new CreateBlob request which will include the file path
+	// The server checks for that file and if the server is local, it will copy the file over
+	// If the local copy fails, the server will continue to the default local copy
+	// If that fails, it will continue with the server POST
+	err = CreateBlob(cmd.Context(), path, digest, bin)
+	if errors.Is(err, ErrBlobExists) {
+		return digest, nil
+	}
+
+	if err != nil {
 		return "", err
 	}
+
 	return digest, nil
 }

+func CreateBlob(ctx context.Context, src, digest string, r *os.File) (error) {
+	ollamaHost := envconfig.Host
+
+	client := http.DefaultClient
+	base := &url.URL{
+		Scheme: ollamaHost.Scheme,
+		Host:   net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
+	}
+
+	path := fmt.Sprintf("/api/blobs/%s", digest)
+	requestURL := base.JoinPath(path)
+	request, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), r)
+	if err != nil {
+		return err
+	}
+
+	authz, err := api.Authorization(ctx, request)
+	if err != nil {
+		return err
+	}
+
+	request.Header.Set("Authorization", authz)
+	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
+	request.Header.Set("X-Ollama-File", src)
+
+	resp, err := client.Do(request)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == http.StatusCreated {
+		return nil
+	}
+
+	if resp.StatusCode == http.StatusOK {
+		return ErrBlobExists
+	}
+
+	return err
+}
+
 func RunHandler(cmd *cobra.Command, args []string) error {
 	interactive := true

@@ -379,11 +432,12 @@ func errFromUnknownKey(unknownKeyErr error) error {
 	if len(matches) > 0 {
 		serverPubKey := matches[0]

-		localPubKey, err := auth.GetPublicKey()
+		publicKey, err := auth.GetPublicKey()
 		if err != nil {
 			return unknownKeyErr
 		}

+		localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
 		if runtime.GOOS == "linux" && serverPubKey != localPubKey {
 			// try the ollama service public key
 			svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
@@ -1072,7 +1126,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 }

 func RunServer(cmd *cobra.Command, _ []string) error {
-	if err := initializeKeypair(); err != nil {
+	if _, err := auth.GetPublicKey(); err != nil {
 		return err
 	}

@@ -1089,52 +1143,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
 	return err
 }

-func initializeKeypair() error {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
-	pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
-
-	_, err = os.Stat(privKeyPath)
-	if os.IsNotExist(err) {
-		fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
-		cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
-		if err != nil {
-			return err
-		}
-
-		privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
-		if err != nil {
-			return err
-		}
-
-		if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
-			return fmt.Errorf("could not create directory %w", err)
-		}
-
-		if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
-			return err
-		}
-
-		sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
-		if err != nil {
-			return err
-		}
-
-		publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
-
-		if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
-			return err
-		}
-
-		fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
-	}
-	return nil
-}
-
 func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -71,11 +71,6 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}

-	if m.Params.HeadDimension > 0 {
-		kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
-		kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
-	}
-
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }

--- a/docs/api.md
+++ b/docs/api.md
@@ -40,7 +40,6 @@ Generate a response for a given prompt with a provided model. This is a streamin

 - `model`: (required) the [model name](#model-names)
 - `prompt`: the prompt to generate a response for
- `suffix`: the text after the model response
 - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)

 Advanced parameters (optional):
@@ -58,8 +57,7 @@ Advanced parameters (optional):

 Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.

-> [!IMPORTANT]
-> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.

 ### Examples

@@ -150,44 +148,8 @@ If `stream` is set to `false`, the response will be a single JSON object:
 }
 ```

-#### Request (with suffix)
-
-##### Request
-
-```shell
-curl http://localhost:11434/api/generate -d '{
-  "model": "codellama:code",
-  "prompt": "def compute_gcd(a, b):",
-  "suffix": "    return result",
-  "options": {
-    "temperature": 0
-  },
-  "stream": false
-}'
-```
-
-##### Response
-
-```json
-{
-  "model": "codellama:code",
-  "created_at": "2024-07-22T20:47:51.147561Z",
-  "response": "\n  if a == 0:\n    return b\n  else:\n    return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n  result = (a * b) / compute_gcd(a, b)\n",
-  "done": true,
-  "done_reason": "stop",
-  "context": [...],
-  "total_duration": 1162761250,
-  "load_duration": 6683708,
-  "prompt_eval_count": 17,
-  "prompt_eval_duration": 201222000,
-  "eval_count": 63,
-  "eval_duration": 953997000
-}
-```
-
 #### Request (JSON mode)

-> [!IMPORTANT]
 > When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.

 ##### Request
@@ -418,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e

 - `model`: (required) the [model name](#model-names)
 - `messages`: the messages of the chat, this can be used to keep a chat memory
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`

 The `message` object has the following fields:

- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
+- `role`: the role of the message, either `system`, `user` or `assistant`
 - `content`: the content of the message
 - `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
- `tool_calls` (optional): a list of tools the model wants to use

 Advanced parameters (optional):

@@ -662,79 +622,6 @@ curl http://localhost:11434/api/chat -d '{
 }
 ```

-#### Chat request (with tools)
-
-##### Request
-
-```
-curl http://localhost:11434/api/chat -d '{
-  "model": "mistral",
-  "messages": [
-    {
-      "role": "user",
-      "content": "What is the weather today in Paris?"
-    }
-  ],
-  "stream": false,
-  "tools": [
-    {
-      "type": "function",
-      "function": {
-        "name": "get_current_weather",
-        "description": "Get the current weather for a location",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "location": {
-              "type": "string",
-              "description": "The location to get the weather for, e.g. San Francisco, CA"
-            },
-            "format": {
-              "type": "string",
-              "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
-              "enum": ["celsius", "fahrenheit"]
-            }
-          },
-          "required": ["location", "format"]
-        }
-      }
-    }
-  ]
-}'
-```
-
-##### Response
-
-```json
-{
-  "model": "mistral:7b-instruct-v0.3-q4_K_M",
-  "created_at": "2024-07-22T20:33:28.123648Z",
-  "message": {
-    "role": "assistant",
-    "content": "",
-    "tool_calls": [
-      {
-        "function": {
-          "name": "get_current_weather",
-          "arguments": {
-            "format": "celsius",
-            "location": "Paris, FR"
-          }
-        }
-      }
-    ]
-  },
-  "done_reason": "stop",
-  "done": true,
-  "total_duration": 885095291,
-  "load_duration": 3753500,
-  "prompt_eval_count": 122,
-  "prompt_eval_duration": 328493000,
-  "eval_count": 33,
-  "eval_duration": 552222000
-}
-```
-
 ## Create a Model

 ```shell
@@ -1139,7 +1026,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
 ## Generate Embeddings

 ```shell
-POST /api/embed
+POST /api/embeddings
 ```

 Generate embeddings from a model
@@ -1147,11 +1034,10 @@ Generate embeddings from a model
 ### Parameters

 - `model`: name of model to generate embeddings from
- `input`: text or list of text to generate embeddings for
+- `prompt`: text to generate embeddings for

 Advanced parameters:

- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)

@@ -1160,9 +1046,9 @@ Advanced parameters:
 #### Request

 ```shell
-curl http://localhost:11434/api/embed -d '{
+curl http://localhost:11434/api/embeddings -d '{
  "model": "all-minilm",
-  "input": "Why is the sky blue?"
+  "prompt": "Here is an article about llamas..."
 }'
 ```

@@ -1170,35 +1056,10 @@ curl http://localhost:11434/api/embed -d '{

 ```json
 {
-  "model": "all-minilm",
-  "embeddings": [[
-    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
-    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
-  ]]
-}
-```
-
-#### Request (Multiple input)
-
-```shell
-curl http://localhost:11434/api/embed -d '{
-  "model": "all-minilm",
-  "input": ["Why is the sky blue?", "Why is the grass green?"]
-}'
-```
-
-#### Response
-
-```json
-{
-  "model": "all-minilm",
-  "embeddings": [[
-    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
-    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
-  ],[
-    -0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
-    0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
-  ]]
+  "embedding": [
+    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
+    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
+  ]
 }
 ```

@@ -1245,45 +1106,3 @@ A single JSON object will be returned.
  ]
 }
 ```
-
-## Generate Embedding
-
-> Note: this endpoint has been superseded by `/api/embed`
-
-```shell
-POST /api/embeddings
-```
-
-Generate embeddings from a model
-
-### Parameters
-
- `model`: name of model to generate embeddings from
- `prompt`: text to generate embeddings for
-
-Advanced parameters:
-
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
-
-### Examples
-
-#### Request
-
-```shell
-curl http://localhost:11434/api/embeddings -d '{
-  "model": "all-minilm",
-  "prompt": "Here is an article about llamas..."
-}'
-```
-
-#### Response
-
-```json
-{
-  "embedding": [
-    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
-    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
-  ]
-}
-```
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -1,7 +1,6 @@
 # Ollama Model File

-> [!NOTE]
-> `Modelfile` syntax is in development
+> Note: `Modelfile` syntax is in development

 A model file is the blueprint to create and share models with Ollama.

--- a/docs/openai.md
+++ b/docs/openai.md
@@ -78,8 +78,8 @@ curl http://localhost:11434/v1/chat/completions \
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
- [x] Tools (streaming support coming soon)
 - [ ] Vision
+- [ ] Function calling
 - [ ] Logprobs

 #### Supported request fields
@@ -97,9 +97,9 @@ curl http://localhost:11434/v1/chat/completions \
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
- [x] `tools`
- [ ] `tool_choice`
 - [ ] `logit_bias`
+- [ ] `tools`
+- [ ] `tool_choice`
 - [ ] `user`
 - [ ] `n`

--- a/docs/template.md
+++ b/docs/template.md
@@ -1,173 +0,0 @@
-# Template
-
-Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
-
-## Basic Template Structure
-
-A basic Go template consists of three main parts:
-
-* **Layout**: The overall structure of the template.
-* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
-* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
-
-Here's an example of a simple chat template:
-
-```gotmpl
-{{- range .Messages }}
-{{ .Role }}: {{ .Content }}
-{{- end }}
-```
-
-In this example, we have:
-
-* A basic messages structure (layout)
-* Three variables: `Messages`, `Role`, and `Content` (variables)
-* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
-
-## Adding templates to your model
-
-By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
-
-Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
-
-To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
-
-```dockerfile
-FROM llama3
-
-TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
-
-{{ .System }}<|eot_id|>
-{{- end }}
-{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
-
-{{ .Content }}<|eot_id|>
-{{- end }}<|start_header_id|>assistant<|end_header_id|>
-
-"""
-```
-
-## Variables
-
-`System` (string): system prompt
-
-`Prompt` (string): user prompt
-
-`Response` (string): assistant response
-
-`Suffix` (string): text inserted after the assistant's response
-
-`Messages` (list): list of messages
-
-`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
-
-`Messages[].Content` (string):  message content
-
-`Messages[].ToolCalls` (list): list of tools the model wants to call
-
-`Messages[].ToolCalls[].Function` (object): function to call
-
-`Messages[].ToolCalls[].Function.Name` (string): function name
-
-`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
-
-`Tools` (list): list of tools the model can access
-
-`Tools[].Type` (string): schema type. `type` is always `function`
-
-`Tools[].Function` (object): function definition
-
-`Tools[].Function.Name` (string): function name
-
-`Tools[].Function.Description` (string): function description
-
-`Tools[].Function.Parameters` (object): function parameters
-
-`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
-
-`Tools[].Function.Parameters.Required` (list): list of required properties
-
-`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
-
-`Tools[].Function.Parameters.Properties[].Type` (string): property type
-
-`Tools[].Function.Parameters.Properties[].Description` (string): property description
-
-`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
-
-## Tips and Best Practices
-
-Keep the following tips and best practices in mind when working with Go templates:
-
-* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
-* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
-* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
-
-## Examples
-
-### Example Messages
-
-#### ChatML
-
-ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
-
-```gotmpl
-{{- if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-{{ end }}
-{{- range .Messages }}<|im_start|>{{ .Role }}
-{{ .Content }}<|im_end|>
-{{ end }}<|im_start|>assistant
-{{ else }}
-{{ if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-```
-
-### Example Tools
-
-Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
-
-#### Mistral
-
-Mistral v0.3 and Mixtral 8x22B supports tool calling.
-
-```gotmpl
-{{- range $index, $_ := .Messages }}
-{{- if eq .Role "user" }}
-{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
-{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
-
-{{ end }}{{ .Content }}[/INST]
-{{- else if eq .Role "assistant" }}
-{{- if .Content }} {{ .Content }}</s>
-{{- else if .ToolCalls }}[TOOL_CALLS] [
-{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
-{{- end }}]</s>
-{{- end }}
-{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
-{{- end }}
-{{- end }}
-```
-
-### Example Fill-in-Middle
-
-Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
-
-#### CodeLlama
-
-CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
-
-```gotmpl
-<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
-```
-
-> [!NOTE]
-> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
-
-#### Codestral
-
-Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
-
-```gotmpl
-[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
-```
--- a/integration/embed_test.go
+++ b/integration/embed_test.go
@@ -4,45 +4,12 @@ package integration

 import (
 	"context"
-	"math"
 	"testing"
 	"time"

 	"github.com/ollama/ollama/api"
 )

-func floatsEqual32(a, b float32) bool {
-	return math.Abs(float64(a-b)) <= 1e-4
-}
-
-func floatsEqual64(a, b float64) bool {
-	return math.Abs(a-b) <= 1e-4
-}
-
-func TestAllMiniLMEmbeddings(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
-	defer cancel()
-
-	req := api.EmbeddingRequest{
-		Model:  "all-minilm",
-		Prompt: "why is the sky blue?",
-	}
-
-	res, err := embeddingTestHelper(ctx, t, req)
-
-	if err != nil {
-		t.Fatalf("error: %v", err)
-	}
-
-	if len(res.Embedding) != 384 {
-		t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
-	}
-
-	if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
-		t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
-	}
-}
-
 func TestAllMiniLMEmbed(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
@@ -66,8 +33,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
 		t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
 	}

-	if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
-		t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
+	if res.Embeddings[0][0] != 0.010071031 {
+		t.Fatalf("expected 0.010071031, got %f", res.Embeddings[0][0])
 	}
 }

@@ -94,12 +61,12 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
 		t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
 	}

-	if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
-		t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
+	if res.Embeddings[0][0] != 0.010071031 || res.Embeddings[1][0] != -0.009802706 {
+		t.Fatalf("expected 0.010071031 and -0.009802706, got %f and %f", res.Embeddings[0][0], res.Embeddings[1][0])
 	}
 }

-func TestAllMiniLMEmbedTruncate(t *testing.T) {
+func TestAllMiniLmEmbedTruncate(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()

@@ -168,22 +135,6 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 	}
 }

-func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
-	client, _, cleanup := InitServerConnection(ctx, t)
-	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatalf("failed to pull model %s: %v", req.Model, err)
-	}
-
-	response, err := client.Embeddings(ctx, &req)
-
-	if err != nil {
-		return nil, err
-	}
-
-	return response, nil
-}
-
 func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
--- a/server/copy_darwin.go
+++ b/server/copy_darwin.go
@@ -0,0 +1,23 @@
+package server
+
+import (
+	"os"
+	"path/filepath"
+
+	"golang.org/x/sys/unix"
+)
+
+func localCopy(src, target string) error {
+	dirPath := filepath.Dir(target)
+
+	if err := os.MkdirAll(dirPath, 0o755); err != nil {
+		return err
+	}
+
+	err := unix.Clonefile(src, target, 0)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
--- a/server/copy_linux.go
+++ b/server/copy_linux.go
@@ -0,0 +1,7 @@
+package server
+
+import "errors"
+
+func localCopy(src, target string) error {
+	return errors.New("no local copy implementation for linux")
+}
--- a/server/copy_windows.go
+++ b/server/copy_windows.go
@@ -0,0 +1,67 @@
+//go:build windows
+// +build windows
+
+package server
+
+import (
+	"os"
+	"path/filepath"
+	"syscall"
+	"unsafe"
+)
+
+func localCopy(src, target string) error {
+	// Create target directory if it doesn't exist
+	dirPath := filepath.Dir(target)
+	if err := os.MkdirAll(dirPath, 0o755); err != nil {
+		return err
+	}
+
+	// Open source file
+	sourceFile, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer sourceFile.Close()
+
+	// Create target file
+	targetFile, err := os.Create(target)
+	if err != nil {
+		return err
+	}
+	defer targetFile.Close()
+
+	// Use CopyFileExW to copy the file
+	err = copyFileEx(src, target)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func copyFileEx(src, dst string) error {
+	kernel32 := syscall.NewLazyDLL("kernel32.dll")
+	copyFileEx := kernel32.NewProc("CopyFileExW")
+
+	srcPtr, err := syscall.UTF16PtrFromString(src)
+	if err != nil {
+		return err
+	}
+
+	dstPtr, err := syscall.UTF16PtrFromString(dst)
+	if err != nil {
+		return err
+	}
+
+	r1, _, err := copyFileEx.Call(
+		uintptr(unsafe.Pointer(srcPtr)),
+		uintptr(unsafe.Pointer(dstPtr)),
+		0, 0, 0, 0)
+
+	if r1 == 0 {
+		return err
+	}
+
+	return nil
+}
--- a/server/download.go
+++ b/server/download.go
@@ -8,7 +8,6 @@ import (
 	"io"
 	"log/slog"
 	"math"
-	"math/rand/v2"
 	"net/http"
 	"net/url"
 	"os"
@@ -142,32 +141,6 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *regis
 	b.err = b.run(ctx, requestURL, opts)
 }

-func newBackoff(maxBackoff time.Duration) func(ctx context.Context) error {
-	var n int
-	return func(ctx context.Context) error {
-		if ctx.Err() != nil {
-			return ctx.Err()
-		}
-
-		n++
-
-		// n^2 backoff timer is a little smoother than the
-		// common choice of 2^n.
-		d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
-		// Randomize the delay between 0.5-1.5 x msec, in order
-		// to prevent accidental "thundering herd" problems.
-		d = time.Duration(float64(d) * (rand.Float64() + 0.5))
-		t := time.NewTimer(d)
-		defer t.Stop()
-		select {
-		case <-ctx.Done():
-			return ctx.Err()
-		case <-t.C:
-			return nil
-		}
-	}
-}
-
 func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
 	defer blobDownloadManager.Delete(b.Digest)
 	ctx, b.CancelFunc = context.WithCancel(ctx)
@@ -180,52 +153,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis

 	_ = file.Truncate(b.Total)

-	directURL, err := func() (*url.URL, error) {
-		ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
-		defer cancel()
-
-		backoff := newBackoff(10 * time.Second)
-		for {
-			// shallow clone opts to be used in the closure
-			// without affecting the outer opts.
-			newOpts := new(registryOptions)
-			*newOpts = *opts
-
-			newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
-				if len(via) > 10 {
-					return errors.New("maxium redirects exceeded (10) for directURL")
-				}
-
-				// if the hostname is the same, allow the redirect
-				if req.URL.Hostname() == requestURL.Hostname() {
-					return nil
-				}
-
-				// stop at the first redirect that is not
-				// the same hostname as the original
-				// request.
-				return http.ErrUseLastResponse
-			}
-
-			resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, nil, nil, newOpts)
-			if err != nil {
-				slog.Warn("failed to get direct URL; backing off and retrying", "err", err)
-				if err := backoff(ctx); err != nil {
-					return nil, err
-				}
-				continue
-			}
-			defer resp.Body.Close()
-			if resp.StatusCode != http.StatusTemporaryRedirect {
-				return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
-			}
-			return resp.Location()
-		}
-	}()
-	if err != nil {
-		return err
-	}
-
 	g, inner := errgroup.WithContext(ctx)
 	g.SetLimit(numDownloadParts)
 	for i := range b.Parts {
@@ -238,7 +165,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
 			var err error
 			for try := 0; try < maxRetries; try++ {
 				w := io.NewOffsetWriter(file, part.StartsAt())
-				err = b.downloadChunk(inner, directURL, w, part, opts)
+				err = b.downloadChunk(inner, requestURL, w, part, opts)
 				switch {
 				case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
 					// return immediately if the context is canceled or the device is out of space
--- a/server/images.go
+++ b/server/images.go
@@ -32,6 +32,7 @@ import (
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
+	"golang.org/x/crypto/ssh"
 )

 var (
@@ -54,8 +55,6 @@ type registryOptions struct {
 	Username string
 	Password string
 	Token    string
-
-	CheckRedirect func(req *http.Request, via []*http.Request) error
 }

 type Model struct {
@@ -1090,11 +1089,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
 	if anonymous {
 		// no user is associated with the public key, and the request requires non-anonymous access
 		pubKey, nestedErr := auth.GetPublicKey()
+		localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(pubKey)))
 		if nestedErr != nil {
 			slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
 			return nil, errUnauthorized
 		}
-		return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
+		return nil, &errtypes.UnknownOllamaKey{Key: localPubKey}
 	}
 	// user is associated with the public key, but is not authorized to make the request
 	return nil, errUnauthorized
@@ -1133,9 +1133,7 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
 		req.ContentLength = contentLength
 	}

-	resp, err := (&http.Client{
-		CheckRedirect: regOpts.CheckRedirect,
-	}).Do(req)
+	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return nil, err
 	}
--- a/server/model.go
+++ b/server/model.go
@@ -263,27 +263,13 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
 			if t, err := template.Named(s); err != nil {
 				slog.Debug("template detection", "error", err)
 			} else {
-				layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
+				tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
 				if err != nil {
 					return nil, err
 				}

-				layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
-				layers = append(layers, &layerGGML{layer, nil})
-
-				if t.Parameters != nil {
-					var b bytes.Buffer
-					if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
-						return nil, err
-					}
-
-					layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
-					if err != nil {
-						return nil, err
-					}
-
-					layers = append(layers, &layerGGML{layer, nil})
-				}
+				tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
+				layers = append(layers, &layerGGML{tmpl, nil})
 			}
 		}
 	}
--- a/server/routes.go
+++ b/server/routes.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"cmp"
 	"context"
+	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -23,8 +24,10 @@ import (

 	"github.com/gin-contrib/cors"
 	"github.com/gin-gonic/gin"
+	"golang.org/x/crypto/ssh"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/gpu"
 	"github.com/ollama/ollama/llm"
@@ -609,9 +612,10 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
 		defer cancel()

 		quantization := cmp.Or(r.Quantize, r.Quantization)
-		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) {
-			ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
-		} else if err != nil {
+		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
+			if errors.Is(err, errBadTemplate) {
+				ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
+			}
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()
@@ -927,7 +931,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}
-
 	_, err = os.Stat(path)
 	switch {
 	case errors.Is(err, os.ErrNotExist):
@@ -940,6 +943,14 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 		return
 	}

+	if c.GetHeader("X-Ollama-File") != "" && s.isLocal(c) {
+		err = localBlobCopy(c.GetHeader("X-Ollama-File"), path)
+		if err == nil {
+			c.Status(http.StatusCreated)
+			return
+		}
+	}
+
 	layer, err := NewLayer(c.Request.Body, "")
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@@ -954,6 +965,108 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 	c.Status(http.StatusCreated)
 }

+func localBlobCopy (src, dest string) error {
+	_, err := os.Stat(src)
+	if err != nil {
+		return err
+	}
+
+	err = localCopy(src, dest)
+	if err == nil {
+		return nil
+	}
+
+	err = defaultCopy(src, dest)
+	if err == nil {
+		return nil
+	}
+
+	return fmt.Errorf("failed to copy blob")
+}
+
+func (s *Server) isLocal(c *gin.Context) bool {
+	if authz := c.GetHeader("Authorization"); authz != "" {
+		parts := strings.Split(authz, ":")
+		if len(parts) != 3 {
+			return false
+		}
+
+		clientPublicKey, _, _, _, err := ssh.ParseAuthorizedKey([]byte(fmt.Sprintf("ssh-ed25519 %s", parts[0])))
+		if err != nil {
+			return false
+		}
+
+		// partialRequestData is formatted as http.Method,http.requestURI,timestamp,nonce
+		requestData, err := base64.StdEncoding.DecodeString(parts[1])
+		if err != nil {
+			return false
+		}
+
+		partialRequestDataParts := strings.Split(string(requestData), ",")
+		if len(partialRequestDataParts) != 3 {
+			return false
+		}
+
+		signature, err := base64.StdEncoding.DecodeString(parts[2])
+		if err != nil {
+			return false
+		}
+
+		if err := clientPublicKey.Verify(requestData, &ssh.Signature{Format: clientPublicKey.Type(), Blob: signature}); err != nil {
+			return false
+		}
+
+		serverPublicKey, err := auth.GetPublicKey()
+		if err != nil {
+			slog.Error(fmt.Sprintf("failed to get server public key: %v", err))
+			return false
+		}
+		
+		if bytes.Equal(serverPublicKey.Marshal(), clientPublicKey.Marshal()) {
+			return true
+		}
+
+		return false
+	}
+
+	return false
+}
+
+func defaultCopy(path string, dest string) error {
+	// This function should be called if the server is local
+	// It should find the model directory, copy the blob over, and return the digest
+	dirPath := filepath.Dir(dest)
+
+	if err := os.MkdirAll(dirPath, 0o755); err != nil {
+		return err
+	}
+
+	// Copy blob over
+	sourceFile, err := os.Open(path)
+	if err != nil {
+		return fmt.Errorf("could not open source file: %v", err)
+	}
+	defer sourceFile.Close()
+
+	destFile, err := os.Create(dest)
+	if err != nil {
+		return fmt.Errorf("could not create destination file: %v", err)
+	}
+	defer destFile.Close()
+
+	_, err = io.CopyBuffer(destFile, sourceFile, make([]byte, 4*1024*1024))
+	if err != nil {
+		return fmt.Errorf("error copying file: %v", err)
+	}
+
+	err = destFile.Sync()
+	if err != nil {
+		return fmt.Errorf("error flushing file: %v", err)
+	}
+
+	return nil
+}
+
 func isLocalIP(ip netip.Addr) bool {
 	if interfaces, err := net.Interfaces(); err == nil {
 		for _, iface := range interfaces {
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -599,10 +599,9 @@ func TestCreateDetectTemplate(t *testing.T) {
 		}

 		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
-			filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
 			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
 			filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
-			filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
+			filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
 		})
 	})

--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -10,15 +10,18 @@ import (
 	"math"
 	"net/http"
 	"net/http/httptest"
+	"net/url"
 	"os"
 	"sort"
 	"strings"
 	"testing"

+	"github.com/gin-gonic/gin"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
@@ -527,3 +530,64 @@ func TestNormalize(t *testing.T) {
 		})
 	}
 }
+
+func TestIsLocalReal(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	clientPubLoc := t.TempDir()
+	t.Setenv("HOME", clientPubLoc)
+	t.Setenv("USERPROFILE", clientPubLoc)
+
+	_, err := auth.GetPublicKey()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	w := httptest.NewRecorder()
+    ctx, _ := gin.CreateTestContext(w)
+	ctx.Request = &http.Request{
+		Header: make(http.Header),
+	}
+
+	requestURL := url.URL{
+		Scheme: "http",
+		Host:   "localhost:8080",
+		Path:   "/api/blobs",
+	}
+	request := &http.Request{
+		Method: http.MethodPost,
+		URL:    &requestURL,
+	}
+	s := &Server{}
+	
+	authz, err := api.Authorization(ctx, request)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Set client authorization header
+	ctx.Request.Header.Set("Authorization", authz)
+	if !s.isLocal(ctx) {
+		t.Fatal("Expected isLocal to return true")
+	}
+
+	t.Run("different server pubkey", func(t *testing.T) {
+		serverPubLoc := t.TempDir()
+		t.Setenv("HOME", serverPubLoc)
+		t.Setenv("USERPROFILE", serverPubLoc)
+		_, err := auth.GetPublicKey()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if s.isLocal(ctx) {
+			t.Fatal("Expected isLocal to return false")
+		}
+	})
+
+	t.Run("invalid pubkey", func(t *testing.T) {
+		ctx.Request.Header.Set("Authorization", "sha-25616:invalid")
+		if s.isLocal(ctx) {
+			t.Fatal("Expected isLocal to return false")
+		}
+	})
+}
--- a/server/sched.go
+++ b/server/sched.go
@@ -132,8 +132,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
 			if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 {
 				numParallel = 1
 				slog.Warn("multimodal models don't support parallel requests yet")
-			} else if strings.Contains(pending.model.Config.ModelFamily, "bert") {
-				numParallel = runtime.NumCPU()
 			}

 			for {
--- a/template/alfred.json
+++ b/template/alfred.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "<start_system>",
-    "<end_message>",
-    "<start_user>",
-    "<start_assistant>"
-  ]
-}
--- a/template/alpaca.json
+++ b/template/alpaca.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "### Instruction:",
-    "### Response"
-  ]
-}
--- a/template/chatml.json
+++ b/template/chatml.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "<|im_start|>",
-    "<|im_end|>"
-  ]
-}
--- a/template/chatqa.json
+++ b/template/chatqa.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "System:",
-    "User:",
-    "Assistant:",
-    "<|begin_of_text|>"
-  ]
-}
--- a/template/codellama-70b-instruct.json
+++ b/template/codellama-70b-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "Source:",
-    "Destination:",
-    "<step>"
-  ]
-}
--- a/template/falcon-instruct.json
+++ b/template/falcon-instruct.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "User:",
-    "Assistant:"
-  ]
-}
--- a/template/gemma-instruct.json
+++ b/template/gemma-instruct.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "<start_of_turn>",
-    "<end_of_turn>"
-  ]
-}
--- a/template/granite-instruct.json
+++ b/template/granite-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "System:",
-    "Question:",
-    "Answer:"
-  ]
-}
--- a/template/llama2-chat.json
+++ b/template/llama2-chat.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "[INST]",
-    "[/INST]",
-    "<<SYS>>",
-    "<</SYS>>"
-  ]
-}
--- a/template/llama3-instruct.json
+++ b/template/llama3-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "<|start_header_id|>",
-    "<|end_header_id|>",
-    "<|eot_id|>"
-  ]
-}
--- a/template/magicoder.json
+++ b/template/magicoder.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "@@ Instruction",
-    "@@ Response"
-  ]
-}
--- a/template/mistral-instruct.json
+++ b/template/mistral-instruct.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "<|im_start|>",
-    "<|im_end|>"
-  ]
-}
--- a/template/openchat.json
+++ b/template/openchat.json
@@ -1,5 +0,0 @@
-{
-  "stop": [
-    "<|end_of_turn|>"
-  ]
-}
--- a/template/phi-3.json
+++ b/template/phi-3.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "<|end|>",
-    "<|system|>",
-    "<|user|>",
-    "<|assistant|>"
-  ]
-}
--- a/template/solar-instruct.json
+++ b/template/solar-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "### System:",
-    "### User:",
-    "### Assistant"
-  ]
-}
--- a/template/starcoder2-instruct.json
+++ b/template/starcoder2-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "### Instruction",
-    "### Response",
-    "<|endoftext|>"
-  ]
-}
--- a/template/template.go
+++ b/template/template.go
@@ -23,7 +23,6 @@ import (
 var indexBytes []byte

 //go:embed *.gotmpl
-//go:embed *.json
 var templatesFS embed.FS

 var templatesOnce = sync.OnceValues(func() ([]*named, error) {
@@ -40,15 +39,6 @@ var templatesOnce = sync.OnceValues(func() ([]*named, error) {

 		// normalize line endings
 		t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
-
-		params, err := templatesFS.ReadFile(t.Name + ".json")
-		if err != nil {
-			continue
-		}
-
-		if err := json.Unmarshal(params, &t.Parameters); err != nil {
-			return nil, err
-		}
 	}

 	return templates, nil
@@ -58,10 +48,6 @@ type named struct {
 	Name     string `json:"name"`
 	Template string `json:"template"`
 	Bytes    []byte
-
-	Parameters *struct {
-		Stop []string `json:"stop"`
-	}
 }

 func (t named) Reader() io.Reader {
--- a/template/vicuna.json
+++ b/template/vicuna.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "USER:",
-    "ASSISTANT:"
-  ]
-}
--- a/template/zephyr.json
+++ b/template/zephyr.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "<|system|>",
-    "</s>",
-    "<|user|>",
-    "<|assistant|>"
-  ]
-}
Author	SHA1	Message	Date
Josh Yan	f30b54209c	revert other pr change	2024-07-24 11:11:59 -07:00
Josh Yan	e39be4f63a	short circuit	2024-07-23 17:14:34 -07:00
Josh Yan	b8c3d54f7a	set homedir for windows --test	2024-07-23 16:28:42 -07:00
Josh Yan	c8434b0e69	rmv unsued	2024-07-23 16:03:19 -07:00
Josh Yan	65658e4077	default to post	2024-07-23 14:05:48 -07:00
Josh Yan	b29382b86f	bin	2024-07-23 13:57:40 -07:00
Josh Yan	2efe2013a1	test	2024-07-23 13:54:22 -07:00
Josh Yan	5c3786f4d5	comments	2024-07-23 13:46:34 -07:00
Josh Yan	33848ad10f	serverside copy	2024-07-23 12:26:05 -07:00
Josh Yan	ff06a2916d	changes	2024-07-22 15:51:52 -07:00
Josh Yan	d923a59356	testing auth	2024-07-22 15:51:52 -07:00
Josh Yan	2b42ad5754	auth changes'	2024-07-22 15:51:52 -07:00
Josh Yan	e3253e5469	isLocal testing	2024-07-22 15:51:52 -07:00
Josh Yan	35b49739ec	timecheck	2024-07-22 15:51:52 -07:00
Josh Yan	bd8596d32b	cmt	2024-07-22 15:51:52 -07:00
Josh Yan	b85705162f	remove knownhosts	2024-07-22 15:51:52 -07:00
Josh Yan	d62a3a1e2b	lint	2024-07-22 15:51:52 -07:00
Josh Yan	de48cd681f	clean	2024-07-22 15:51:52 -07:00
Josh Yan	5d0e078057	removed cmt and prints	2024-07-22 15:51:52 -07:00
Josh Yan	8d5739b833	removed client isLocal()	2024-07-22 15:51:52 -07:00
Josh Yan	b5ff0ed4ff	lint	2024-07-22 15:51:52 -07:00
Josh Yan	857054f9fa	lint	2024-07-22 15:51:52 -07:00
Josh Yan	6dd9be55e2	lint	2024-07-22 15:51:52 -07:00
Josh Yan	d70707a668	syscopy windows	2024-07-22 15:51:52 -07:00
Josh Yan	c88774ffeb	os copy	2024-07-22 15:51:52 -07:00
Josh Yan	34d197000d	rmv prints	2024-07-22 15:51:52 -07:00
Josh Yan	6c0a8379f6	local copy	2024-07-22 15:51:52 -07:00
Josh Yan	163ee9a8b0	isLocal firstdraft	2024-07-22 15:51:52 -07:00
Josh Yan	de7b2f3948	clean	2024-07-22 15:51:52 -07:00
Josh Yan	f27c66fb0c	rm bench	2024-07-22 15:51:52 -07:00
Josh Yan	a238191798	rm config	2024-07-22 15:51:52 -07:00
Josh Yan	6436c7a375	rm config	2024-07-22 15:51:52 -07:00
Josh Yan	896a15874e	clean	2024-07-22 15:51:52 -07:00
Josh Yan	56008688a1	local path	2024-07-22 15:51:52 -07:00
Josh Yan	d14d38e940	still works	2024-07-22 15:51:52 -07:00
Josh Yan	03df02883d	rebase	2024-07-22 15:51:52 -07:00
Josh Yan	ae49abf80a	benchmark	2024-07-22 15:51:52 -07:00
Josh Yan	2c450502db	on disk copy	2024-07-22 15:51:52 -07:00
Josh Yan	46b76aeb46	start tests	2024-07-22 15:51:52 -07:00
Josh Yan	0e01da82d6	errorsis	2024-07-22 15:51:31 -07:00
Josh Yan	6b1b85ba3d	hide initialize keypair	2024-07-22 15:41:04 -07:00
Josh Yan	5603441538	test	2024-07-22 13:58:50 -07:00
Josh Yan	76b4dfcc9e	auth	2024-07-22 13:54:02 -07:00