errorsis

hide initialize keypair
test
2026-01-01 20:18:52 -05:00 · 2024-07-22 15:51:31 -07:00 · 2024-07-22 15:41:04 -07:00 · 2024-07-22 13:58:50 -07:00 · 2024-07-22 13:54:02 -07:00
37 changed files with 140 additions and 766 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,7 +31,7 @@ jobs:
          security set-keychain-settings -lut 3600 build.keychain
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: Build Darwin
        env:
@@ -87,7 +87,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -141,7 +141,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
@@ -218,7 +218,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -306,7 +306,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get
      - uses: actions/download-artifact@v4
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -63,7 +63,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -163,7 +163,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
@@ -200,7 +200,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -255,7 +255,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: false
      - run: |
          case ${{ matrix.arch }} in
@@ -297,7 +297,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: |
          case ${{ matrix.arch }} in
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-ARG GOLANG_VERSION=1.22.5
+ARG GOLANG_VERSION=1.22.1
 ARG CMAKE_VERSION=3.22.1
 # this CUDA_VERSION corresponds with the one specified in docs/gpu.md
 ARG CUDA_VERSION=11.3.1
--- a/README.md
+++ b/README.md
@@ -64,8 +64,7 @@ Here are some example models that can be downloaded:
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Solar              | 10.7B      | 6.1GB | `ollama run solar`             |

-> [!NOTE]
-> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
+> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.

 ## Customize a model

@@ -297,7 +296,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio)
 - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)

 ### Terminal

--- a/auth/auth.go
+++ b/auth/auth.go
@@ -3,49 +3,68 @@ package auth
 import (
 	"bytes"
 	"context"
+	"crypto/ed25519"
 	"crypto/rand"
 	"encoding/base64"
+	"encoding/pem"
+	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
-	"strings"

 	"golang.org/x/crypto/ssh"
 )

 const defaultPrivateKey = "id_ed25519"

-func keyPath() (string, error) {
+func privateKey() (ssh.Signer, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
-		return "", err
+		return nil, err
 	}

-	return filepath.Join(home, ".ollama", defaultPrivateKey), nil
+	keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
+	privateKeyFile, err := os.ReadFile(keyPath)
+	if errors.Is(err, os.ErrNotExist) {
+		err := initializeKeypair()
+		if err != nil {
+			return nil, err
+		}
+
+		return privateKey()
+	} else if err != nil {
+		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
+		return nil, err
+	}
+
+	return ssh.ParsePrivateKey(privateKeyFile)
 }

-func GetPublicKey() (string, error) {
-	keyPath, err := keyPath()
+func GetPublicKey() (ssh.PublicKey, error) {
+	// try to read pubkey first
+	home, err := os.UserHomeDir()
 	if err != nil {
-		return "", err
+		return nil, err
 	}

-	privateKeyFile, err := os.ReadFile(keyPath)
-	if err != nil {
-		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
-		return "", err
+	pubkeyPath := filepath.Join(home, ".ollama", defaultPrivateKey+".pub")
+	pubKeyFile, err := os.ReadFile(pubkeyPath)
+	if errors.Is(err, os.ErrNotExist) {
+		// try from privateKey
+		privateKey, err := privateKey()
+		if err != nil {
+			return nil, fmt.Errorf("failed to read public key: %w", err)
+		}
+
+		return privateKey.PublicKey(), nil
+	} else if err != nil {
+		return nil, fmt.Errorf("failed to read public key: %w", err)
 	}

-	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
-	if err != nil {
-		return "", err
-	}
-
-	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
-
-	return strings.TrimSpace(string(publicKey)), nil
+	pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyFile)
+	return pubKey, err
 }

 func NewNonce(r io.Reader, length int) (string, error) {
@@ -58,25 +77,20 @@ func NewNonce(r io.Reader, length int) (string, error) {
 }

 func Sign(ctx context.Context, bts []byte) (string, error) {
-	keyPath, err := keyPath()
-	if err != nil {
-		return "", err
-	}
-
-	privateKeyFile, err := os.ReadFile(keyPath)
-	if err != nil {
-		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
-		return "", err
-	}
-
-	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
+	privateKey, err := privateKey()
 	if err != nil {
 		return "", err
 	}

 	// get the pubkey, but remove the type
-	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
-	parts := bytes.Split(publicKey, []byte(" "))
+	publicKey, err := GetPublicKey()
+	if err != nil {
+		return "", err
+	}
+
+	publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
+
+	parts := bytes.Split(publicKeyBytes, []byte(" "))
 	if len(parts) < 2 {
 		return "", fmt.Errorf("malformed public key")
 	}
@@ -89,3 +103,49 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
 	// signature is <pubkey>:<signature>
 	return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
 }
+
+func initializeKeypair() error {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
+	pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
+
+	_, err = os.Stat(privKeyPath)
+	if errors.Is(err, os.ErrNotExist) {
+		fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
+		cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
+		if err != nil {
+			return err
+		}
+
+		privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
+		if err != nil {
+			return err
+		}
+
+		if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
+			return fmt.Errorf("could not create directory %w", err)
+		}
+
+		if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
+			return err
+		}
+
+		sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
+		if err != nil {
+			return err
+		}
+
+		publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
+
+		if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
+			return err
+		}
+
+		fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
+	}
+	return nil
+}
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -4,10 +4,7 @@ import (
 	"archive/zip"
 	"bytes"
 	"context"
-	"crypto/ed25519"
-	"crypto/rand"
 	"crypto/sha256"
-	"encoding/pem"
 	"errors"
 	"fmt"
 	"io"
@@ -379,11 +376,12 @@ func errFromUnknownKey(unknownKeyErr error) error {
 	if len(matches) > 0 {
 		serverPubKey := matches[0]

-		localPubKey, err := auth.GetPublicKey()
+		publicKey, err := auth.GetPublicKey()
 		if err != nil {
 			return unknownKeyErr
 		}

+		localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
 		if runtime.GOOS == "linux" && serverPubKey != localPubKey {
 			// try the ollama service public key
 			svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
@@ -1072,7 +1070,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 }

 func RunServer(cmd *cobra.Command, _ []string) error {
-	if err := initializeKeypair(); err != nil {
+	if _, err := auth.GetPublicKey(); err != nil {
 		return err
 	}

@@ -1089,52 +1087,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
 	return err
 }

-func initializeKeypair() error {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
-	pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
-
-	_, err = os.Stat(privKeyPath)
-	if os.IsNotExist(err) {
-		fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
-		cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
-		if err != nil {
-			return err
-		}
-
-		privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
-		if err != nil {
-			return err
-		}
-
-		if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
-			return fmt.Errorf("could not create directory %w", err)
-		}
-
-		if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
-			return err
-		}
-
-		sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
-		if err != nil {
-			return err
-		}
-
-		publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
-
-		if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
-			return err
-		}
-
-		fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
-	}
-	return nil
-}
-
 func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -71,11 +71,6 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}

-	if m.Params.HeadDimension > 0 {
-		kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
-		kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
-	}
-
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }

--- a/docs/api.md
+++ b/docs/api.md
@@ -40,7 +40,6 @@ Generate a response for a given prompt with a provided model. This is a streamin

 - `model`: (required) the [model name](#model-names)
 - `prompt`: the prompt to generate a response for
- `suffix`: the text after the model response
 - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)

 Advanced parameters (optional):
@@ -58,8 +57,7 @@ Advanced parameters (optional):

 Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.

-> [!IMPORTANT]
-> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.

 ### Examples

@@ -150,44 +148,8 @@ If `stream` is set to `false`, the response will be a single JSON object:
 }
 ```

-#### Request (with suffix)
-
-##### Request
-
-```shell
-curl http://localhost:11434/api/generate -d '{
-  "model": "codellama:code",
-  "prompt": "def compute_gcd(a, b):",
-  "suffix": "    return result",
-  "options": {
-    "temperature": 0
-  },
-  "stream": false
-}'
-```
-
-##### Response
-
-```json
-{
-  "model": "codellama:code",
-  "created_at": "2024-07-22T20:47:51.147561Z",
-  "response": "\n  if a == 0:\n    return b\n  else:\n    return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n  result = (a * b) / compute_gcd(a, b)\n",
-  "done": true,
-  "done_reason": "stop",
-  "context": [...],
-  "total_duration": 1162761250,
-  "load_duration": 6683708,
-  "prompt_eval_count": 17,
-  "prompt_eval_duration": 201222000,
-  "eval_count": 63,
-  "eval_duration": 953997000
-}
-```
-
 #### Request (JSON mode)

-> [!IMPORTANT]
 > When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.

 ##### Request
@@ -418,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e

 - `model`: (required) the [model name](#model-names)
 - `messages`: the messages of the chat, this can be used to keep a chat memory
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`

 The `message` object has the following fields:

- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
+- `role`: the role of the message, either `system`, `user` or `assistant`
 - `content`: the content of the message
 - `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
- `tool_calls` (optional): a list of tools the model wants to use

 Advanced parameters (optional):

@@ -662,79 +622,6 @@ curl http://localhost:11434/api/chat -d '{
 }
 ```

-#### Chat request (with tools)
-
-##### Request
-
-```
-curl http://localhost:11434/api/chat -d '{
-  "model": "mistral",
-  "messages": [
-    {
-      "role": "user",
-      "content": "What is the weather today in Paris?"
-    }
-  ],
-  "stream": false,
-  "tools": [
-    {
-      "type": "function",
-      "function": {
-        "name": "get_current_weather",
-        "description": "Get the current weather for a location",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "location": {
-              "type": "string",
-              "description": "The location to get the weather for, e.g. San Francisco, CA"
-            },
-            "format": {
-              "type": "string",
-              "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
-              "enum": ["celsius", "fahrenheit"]
-            }
-          },
-          "required": ["location", "format"]
-        }
-      }
-    }
-  ]
-}'
-```
-
-##### Response
-
-```json
-{
-  "model": "mistral:7b-instruct-v0.3-q4_K_M",
-  "created_at": "2024-07-22T20:33:28.123648Z",
-  "message": {
-    "role": "assistant",
-    "content": "",
-    "tool_calls": [
-      {
-        "function": {
-          "name": "get_current_weather",
-          "arguments": {
-            "format": "celsius",
-            "location": "Paris, FR"
-          }
-        }
-      }
-    ]
-  },
-  "done_reason": "stop",
-  "done": true,
-  "total_duration": 885095291,
-  "load_duration": 3753500,
-  "prompt_eval_count": 122,
-  "prompt_eval_duration": 328493000,
-  "eval_count": 33,
-  "eval_duration": 552222000
-}
-```
-
 ## Create a Model

 ```shell
@@ -1139,7 +1026,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
 ## Generate Embeddings

 ```shell
-POST /api/embed
+POST /api/embeddings
 ```

 Generate embeddings from a model
@@ -1147,11 +1034,10 @@ Generate embeddings from a model
 ### Parameters

 - `model`: name of model to generate embeddings from
- `input`: text or list of text to generate embeddings for
+- `prompt`: text to generate embeddings for

 Advanced parameters:

- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)

@@ -1160,9 +1046,9 @@ Advanced parameters:
 #### Request

 ```shell
-curl http://localhost:11434/api/embed -d '{
+curl http://localhost:11434/api/embeddings -d '{
  "model": "all-minilm",
-  "input": "Why is the sky blue?"
+  "prompt": "Here is an article about llamas..."
 }'
 ```

@@ -1170,35 +1056,10 @@ curl http://localhost:11434/api/embed -d '{

 ```json
 {
-  "model": "all-minilm",
-  "embeddings": [[
-    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
-    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
-  ]]
-}
-```
-
-#### Request (Multiple input)
-
-```shell
-curl http://localhost:11434/api/embed -d '{
-  "model": "all-minilm",
-  "input": ["Why is the sky blue?", "Why is the grass green?"]
-}'
-```
-
-#### Response
-
-```json
-{
-  "model": "all-minilm",
-  "embeddings": [[
-    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
-    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
-  ],[
-    -0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
-    0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
-  ]]
+  "embedding": [
+    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
+    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
+  ]
 }
 ```

@@ -1245,45 +1106,3 @@ A single JSON object will be returned.
  ]
 }
 ```
-
-## Generate Embedding
-
-> Note: this endpoint has been superseded by `/api/embed`
-
-```shell
-POST /api/embeddings
-```
-
-Generate embeddings from a model
-
-### Parameters
-
- `model`: name of model to generate embeddings from
- `prompt`: text to generate embeddings for
-
-Advanced parameters:
-
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
-
-### Examples
-
-#### Request
-
-```shell
-curl http://localhost:11434/api/embeddings -d '{
-  "model": "all-minilm",
-  "prompt": "Here is an article about llamas..."
-}'
-```
-
-#### Response
-
-```json
-{
-  "embedding": [
-    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
-    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
-  ]
-}
-```
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -1,7 +1,6 @@
 # Ollama Model File

-> [!NOTE]
-> `Modelfile` syntax is in development
+> Note: `Modelfile` syntax is in development

 A model file is the blueprint to create and share models with Ollama.

--- a/docs/openai.md
+++ b/docs/openai.md
@@ -78,8 +78,8 @@ curl http://localhost:11434/v1/chat/completions \
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
- [x] Tools (streaming support coming soon)
 - [ ] Vision
+- [ ] Function calling
 - [ ] Logprobs

 #### Supported request fields
@@ -97,9 +97,9 @@ curl http://localhost:11434/v1/chat/completions \
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
- [x] `tools`
- [ ] `tool_choice`
 - [ ] `logit_bias`
+- [ ] `tools`
+- [ ] `tool_choice`
 - [ ] `user`
 - [ ] `n`

--- a/docs/template.md
+++ b/docs/template.md
@@ -1,173 +0,0 @@
-# Template
-
-Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
-
-## Basic Template Structure
-
-A basic Go template consists of three main parts:
-
-* **Layout**: The overall structure of the template.
-* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
-* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
-
-Here's an example of a simple chat template:
-
-```gotmpl
-{{- range .Messages }}
-{{ .Role }}: {{ .Content }}
-{{- end }}
-```
-
-In this example, we have:
-
-* A basic messages structure (layout)
-* Three variables: `Messages`, `Role`, and `Content` (variables)
-* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
-
-## Adding templates to your model
-
-By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
-
-Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
-
-To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
-
-```dockerfile
-FROM llama3
-
-TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
-
-{{ .System }}<|eot_id|>
-{{- end }}
-{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
-
-{{ .Content }}<|eot_id|>
-{{- end }}<|start_header_id|>assistant<|end_header_id|>
-
-"""
-```
-
-## Variables
-
-`System` (string): system prompt
-
-`Prompt` (string): user prompt
-
-`Response` (string): assistant response
-
-`Suffix` (string): text inserted after the assistant's response
-
-`Messages` (list): list of messages
-
-`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
-
-`Messages[].Content` (string):  message content
-
-`Messages[].ToolCalls` (list): list of tools the model wants to call
-
-`Messages[].ToolCalls[].Function` (object): function to call
-
-`Messages[].ToolCalls[].Function.Name` (string): function name
-
-`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
-
-`Tools` (list): list of tools the model can access
-
-`Tools[].Type` (string): schema type. `type` is always `function`
-
-`Tools[].Function` (object): function definition
-
-`Tools[].Function.Name` (string): function name
-
-`Tools[].Function.Description` (string): function description
-
-`Tools[].Function.Parameters` (object): function parameters
-
-`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
-
-`Tools[].Function.Parameters.Required` (list): list of required properties
-
-`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
-
-`Tools[].Function.Parameters.Properties[].Type` (string): property type
-
-`Tools[].Function.Parameters.Properties[].Description` (string): property description
-
-`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
-
-## Tips and Best Practices
-
-Keep the following tips and best practices in mind when working with Go templates:
-
-* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
-* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
-* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
-
-## Examples
-
-### Example Messages
-
-#### ChatML
-
-ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
-
-```gotmpl
-{{- if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-{{ end }}
-{{- range .Messages }}<|im_start|>{{ .Role }}
-{{ .Content }}<|im_end|>
-{{ end }}<|im_start|>assistant
-{{ else }}
-{{ if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-```
-
-### Example Tools
-
-Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
-
-#### Mistral
-
-Mistral v0.3 and Mixtral 8x22B supports tool calling.
-
-```gotmpl
-{{- range $index, $_ := .Messages }}
-{{- if eq .Role "user" }}
-{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
-{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
-
-{{ end }}{{ .Content }}[/INST]
-{{- else if eq .Role "assistant" }}
-{{- if .Content }} {{ .Content }}</s>
-{{- else if .ToolCalls }}[TOOL_CALLS] [
-{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
-{{- end }}]</s>
-{{- end }}
-{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
-{{- end }}
-{{- end }}
-```
-
-### Example Fill-in-Middle
-
-Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
-
-#### CodeLlama
-
-CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
-
-```gotmpl
-<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
-```
-
-> [!NOTE]
-> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
-
-#### Codestral
-
-Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
-
-```gotmpl
-[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
-```
--- a/integration/embed_test.go
+++ b/integration/embed_test.go
@@ -4,45 +4,12 @@ package integration

 import (
 	"context"
-	"math"
 	"testing"
 	"time"

 	"github.com/ollama/ollama/api"
 )

-func floatsEqual32(a, b float32) bool {
-	return math.Abs(float64(a-b)) <= 1e-4
-}
-
-func floatsEqual64(a, b float64) bool {
-	return math.Abs(a-b) <= 1e-4
-}
-
-func TestAllMiniLMEmbeddings(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
-	defer cancel()
-
-	req := api.EmbeddingRequest{
-		Model:  "all-minilm",
-		Prompt: "why is the sky blue?",
-	}
-
-	res, err := embeddingTestHelper(ctx, t, req)
-
-	if err != nil {
-		t.Fatalf("error: %v", err)
-	}
-
-	if len(res.Embedding) != 384 {
-		t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
-	}
-
-	if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
-		t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
-	}
-}
-
 func TestAllMiniLMEmbed(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
@@ -66,8 +33,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
 		t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
 	}

-	if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
-		t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
+	if res.Embeddings[0][0] != 0.010071031 {
+		t.Fatalf("expected 0.010071031, got %f", res.Embeddings[0][0])
 	}
 }

@@ -94,12 +61,12 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
 		t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
 	}

-	if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
-		t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
+	if res.Embeddings[0][0] != 0.010071031 || res.Embeddings[1][0] != -0.009802706 {
+		t.Fatalf("expected 0.010071031 and -0.009802706, got %f and %f", res.Embeddings[0][0], res.Embeddings[1][0])
 	}
 }

-func TestAllMiniLMEmbedTruncate(t *testing.T) {
+func TestAllMiniLmEmbedTruncate(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()

@@ -168,22 +135,6 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 	}
 }

-func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
-	client, _, cleanup := InitServerConnection(ctx, t)
-	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatalf("failed to pull model %s: %v", req.Model, err)
-	}
-
-	response, err := client.Embeddings(ctx, &req)
-
-	if err != nil {
-		return nil, err
-	}
-
-	return response, nil
-}
-
 func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
--- a/server/download.go
+++ b/server/download.go
@@ -8,7 +8,6 @@ import (
 	"io"
 	"log/slog"
 	"math"
-	"math/rand/v2"
 	"net/http"
 	"net/url"
 	"os"
@@ -142,32 +141,6 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *regis
 	b.err = b.run(ctx, requestURL, opts)
 }

-func newBackoff(maxBackoff time.Duration) func(ctx context.Context) error {
-	var n int
-	return func(ctx context.Context) error {
-		if ctx.Err() != nil {
-			return ctx.Err()
-		}
-
-		n++
-
-		// n^2 backoff timer is a little smoother than the
-		// common choice of 2^n.
-		d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
-		// Randomize the delay between 0.5-1.5 x msec, in order
-		// to prevent accidental "thundering herd" problems.
-		d = time.Duration(float64(d) * (rand.Float64() + 0.5))
-		t := time.NewTimer(d)
-		defer t.Stop()
-		select {
-		case <-ctx.Done():
-			return ctx.Err()
-		case <-t.C:
-			return nil
-		}
-	}
-}
-
 func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
 	defer blobDownloadManager.Delete(b.Digest)
 	ctx, b.CancelFunc = context.WithCancel(ctx)
@@ -180,52 +153,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis

 	_ = file.Truncate(b.Total)

-	directURL, err := func() (*url.URL, error) {
-		ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
-		defer cancel()
-
-		backoff := newBackoff(10 * time.Second)
-		for {
-			// shallow clone opts to be used in the closure
-			// without affecting the outer opts.
-			newOpts := new(registryOptions)
-			*newOpts = *opts
-
-			newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
-				if len(via) > 10 {
-					return errors.New("maxium redirects exceeded (10) for directURL")
-				}
-
-				// if the hostname is the same, allow the redirect
-				if req.URL.Hostname() == requestURL.Hostname() {
-					return nil
-				}
-
-				// stop at the first redirect that is not
-				// the same hostname as the original
-				// request.
-				return http.ErrUseLastResponse
-			}
-
-			resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, nil, nil, newOpts)
-			if err != nil {
-				slog.Warn("failed to get direct URL; backing off and retrying", "err", err)
-				if err := backoff(ctx); err != nil {
-					return nil, err
-				}
-				continue
-			}
-			defer resp.Body.Close()
-			if resp.StatusCode != http.StatusTemporaryRedirect {
-				return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
-			}
-			return resp.Location()
-		}
-	}()
-	if err != nil {
-		return err
-	}
-
 	g, inner := errgroup.WithContext(ctx)
 	g.SetLimit(numDownloadParts)
 	for i := range b.Parts {
@@ -238,7 +165,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
 			var err error
 			for try := 0; try < maxRetries; try++ {
 				w := io.NewOffsetWriter(file, part.StartsAt())
-				err = b.downloadChunk(inner, directURL, w, part, opts)
+				err = b.downloadChunk(inner, requestURL, w, part, opts)
 				switch {
 				case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
 					// return immediately if the context is canceled or the device is out of space
--- a/server/images.go
+++ b/server/images.go
@@ -32,6 +32,7 @@ import (
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
+	"golang.org/x/crypto/ssh"
 )

 var (
@@ -54,8 +55,6 @@ type registryOptions struct {
 	Username string
 	Password string
 	Token    string
-
-	CheckRedirect func(req *http.Request, via []*http.Request) error
 }

 type Model struct {
@@ -1090,11 +1089,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
 	if anonymous {
 		// no user is associated with the public key, and the request requires non-anonymous access
 		pubKey, nestedErr := auth.GetPublicKey()
+		localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(pubKey)))
 		if nestedErr != nil {
 			slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
 			return nil, errUnauthorized
 		}
-		return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
+		return nil, &errtypes.UnknownOllamaKey{Key: localPubKey}
 	}
 	// user is associated with the public key, but is not authorized to make the request
 	return nil, errUnauthorized
@@ -1133,9 +1133,7 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
 		req.ContentLength = contentLength
 	}

-	resp, err := (&http.Client{
-		CheckRedirect: regOpts.CheckRedirect,
-	}).Do(req)
+	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return nil, err
 	}
--- a/server/model.go
+++ b/server/model.go
@@ -263,27 +263,13 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
 			if t, err := template.Named(s); err != nil {
 				slog.Debug("template detection", "error", err)
 			} else {
-				layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
+				tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
 				if err != nil {
 					return nil, err
 				}

-				layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
-				layers = append(layers, &layerGGML{layer, nil})
-
-				if t.Parameters != nil {
-					var b bytes.Buffer
-					if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
-						return nil, err
-					}
-
-					layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
-					if err != nil {
-						return nil, err
-					}
-
-					layers = append(layers, &layerGGML{layer, nil})
-				}
+				tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
+				layers = append(layers, &layerGGML{tmpl, nil})
 			}
 		}
 	}
--- a/server/routes.go
+++ b/server/routes.go
@@ -609,9 +609,10 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
 		defer cancel()

 		quantization := cmp.Or(r.Quantize, r.Quantization)
-		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) {
-			ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
-		} else if err != nil {
+		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
+			if errors.Is(err, errBadTemplate) {
+				ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
+			}
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -599,10 +599,9 @@ func TestCreateDetectTemplate(t *testing.T) {
 		}

 		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
-			filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
 			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
 			filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
-			filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
+			filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
 		})
 	})

--- a/server/sched.go
+++ b/server/sched.go
@@ -132,8 +132,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
 			if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 {
 				numParallel = 1
 				slog.Warn("multimodal models don't support parallel requests yet")
-			} else if strings.Contains(pending.model.Config.ModelFamily, "bert") {
-				numParallel = runtime.NumCPU()
 			}

 			for {
--- a/template/alfred.json
+++ b/template/alfred.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "<start_system>",
-    "<end_message>",
-    "<start_user>",
-    "<start_assistant>"
-  ]
-}
--- a/template/alpaca.json
+++ b/template/alpaca.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "### Instruction:",
-    "### Response"
-  ]
-}
--- a/template/chatml.json
+++ b/template/chatml.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "<|im_start|>",
-    "<|im_end|>"
-  ]
-}
--- a/template/chatqa.json
+++ b/template/chatqa.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "System:",
-    "User:",
-    "Assistant:",
-    "<|begin_of_text|>"
-  ]
-}
--- a/template/codellama-70b-instruct.json
+++ b/template/codellama-70b-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "Source:",
-    "Destination:",
-    "<step>"
-  ]
-}
--- a/template/falcon-instruct.json
+++ b/template/falcon-instruct.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "User:",
-    "Assistant:"
-  ]
-}
--- a/template/gemma-instruct.json
+++ b/template/gemma-instruct.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "<start_of_turn>",
-    "<end_of_turn>"
-  ]
-}
--- a/template/granite-instruct.json
+++ b/template/granite-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "System:",
-    "Question:",
-    "Answer:"
-  ]
-}
--- a/template/llama2-chat.json
+++ b/template/llama2-chat.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "[INST]",
-    "[/INST]",
-    "<<SYS>>",
-    "<</SYS>>"
-  ]
-}
--- a/template/llama3-instruct.json
+++ b/template/llama3-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "<|start_header_id|>",
-    "<|end_header_id|>",
-    "<|eot_id|>"
-  ]
-}
--- a/template/magicoder.json
+++ b/template/magicoder.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "@@ Instruction",
-    "@@ Response"
-  ]
-}
--- a/template/mistral-instruct.json
+++ b/template/mistral-instruct.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "<|im_start|>",
-    "<|im_end|>"
-  ]
-}
--- a/template/openchat.json
+++ b/template/openchat.json
@@ -1,5 +0,0 @@
-{
-  "stop": [
-    "<|end_of_turn|>"
-  ]
-}
--- a/template/phi-3.json
+++ b/template/phi-3.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "<|end|>",
-    "<|system|>",
-    "<|user|>",
-    "<|assistant|>"
-  ]
-}
--- a/template/solar-instruct.json
+++ b/template/solar-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "### System:",
-    "### User:",
-    "### Assistant"
-  ]
-}
--- a/template/starcoder2-instruct.json
+++ b/template/starcoder2-instruct.json
@@ -1,7 +0,0 @@
-{
-  "stop": [
-    "### Instruction",
-    "### Response",
-    "<|endoftext|>"
-  ]
-}
--- a/template/template.go
+++ b/template/template.go
@@ -23,7 +23,6 @@ import (
 var indexBytes []byte

 //go:embed *.gotmpl
-//go:embed *.json
 var templatesFS embed.FS

 var templatesOnce = sync.OnceValues(func() ([]*named, error) {
@@ -40,15 +39,6 @@ var templatesOnce = sync.OnceValues(func() ([]*named, error) {

 		// normalize line endings
 		t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
-
-		params, err := templatesFS.ReadFile(t.Name + ".json")
-		if err != nil {
-			continue
-		}
-
-		if err := json.Unmarshal(params, &t.Parameters); err != nil {
-			return nil, err
-		}
 	}

 	return templates, nil
@@ -58,10 +48,6 @@ type named struct {
 	Name     string `json:"name"`
 	Template string `json:"template"`
 	Bytes    []byte
-
-	Parameters *struct {
-		Stop []string `json:"stop"`
-	}
 }

 func (t named) Reader() io.Reader {
--- a/template/vicuna.json
+++ b/template/vicuna.json
@@ -1,6 +0,0 @@
-{
-  "stop": [
-    "USER:",
-    "ASSISTANT:"
-  ]
-}
--- a/template/zephyr.json
+++ b/template/zephyr.json
@@ -1,8 +0,0 @@
-{
-  "stop": [
-    "<|system|>",
-    "</s>",
-    "<|user|>",
-    "<|assistant|>"
-  ]
-}
Author	SHA1	Message	Date
Josh Yan	0e01da82d6	errorsis	2024-07-22 15:51:31 -07:00
Josh Yan	6b1b85ba3d	hide initialize keypair	2024-07-22 15:41:04 -07:00
Josh Yan	5603441538	test	2024-07-22 13:58:50 -07:00
Josh Yan	76b4dfcc9e	auth	2024-07-22 13:54:02 -07:00