Compare commits

..

4 Commits

Author SHA1 Message Date
Josh Yan
0e01da82d6 errorsis 2024-07-22 15:51:31 -07:00
Josh Yan
6b1b85ba3d hide initialize keypair 2024-07-22 15:41:04 -07:00
Josh Yan
5603441538 test 2024-07-22 13:58:50 -07:00
Josh Yan
76b4dfcc9e auth 2024-07-22 13:54:02 -07:00
37 changed files with 140 additions and 766 deletions

View File

@@ -31,7 +31,7 @@ jobs:
security set-keychain-settings -lut 3600 build.keychain
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: Build Darwin
env:
@@ -87,7 +87,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
@@ -141,7 +141,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install ROCm'
run: |
@@ -218,7 +218,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install CUDA'
run: |
@@ -306,7 +306,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: go get
- uses: actions/download-artifact@v4

View File

@@ -63,7 +63,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
@@ -163,7 +163,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install ROCm'
run: |
@@ -200,7 +200,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- name: 'Install CUDA'
run: |
@@ -255,7 +255,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: false
- run: |
case ${{ matrix.arch }} in
@@ -297,7 +297,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version: "stable"
go-version-file: go.mod
cache: true
- run: |
case ${{ matrix.arch }} in

View File

@@ -1,4 +1,4 @@
ARG GOLANG_VERSION=1.22.5
ARG GOLANG_VERSION=1.22.1
ARG CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG CUDA_VERSION=11.3.1

View File

@@ -64,8 +64,7 @@ Here are some example models that can be downloaded:
| LLaVA | 7B | 4.5GB | `ollama run llava` |
| Solar | 10.7B | 6.1GB | `ollama run solar` |
> [!NOTE]
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
## Customize a model
@@ -297,7 +296,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
### Terminal

View File

@@ -3,49 +3,68 @@ package auth
import (
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/pem"
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strings"
"golang.org/x/crypto/ssh"
)
const defaultPrivateKey = "id_ed25519"
func keyPath() (string, error) {
func privateKey() (ssh.Signer, error) {
home, err := os.UserHomeDir()
if err != nil {
return "", err
return nil, err
}
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
privateKeyFile, err := os.ReadFile(keyPath)
if errors.Is(err, os.ErrNotExist) {
err := initializeKeypair()
if err != nil {
return nil, err
}
return privateKey()
} else if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return nil, err
}
return ssh.ParsePrivateKey(privateKeyFile)
}
func GetPublicKey() (string, error) {
keyPath, err := keyPath()
func GetPublicKey() (ssh.PublicKey, error) {
// try to read pubkey first
home, err := os.UserHomeDir()
if err != nil {
return "", err
return nil, err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
pubkeyPath := filepath.Join(home, ".ollama", defaultPrivateKey+".pub")
pubKeyFile, err := os.ReadFile(pubkeyPath)
if errors.Is(err, os.ErrNotExist) {
// try from privateKey
privateKey, err := privateKey()
if err != nil {
return nil, fmt.Errorf("failed to read public key: %w", err)
}
return privateKey.PublicKey(), nil
} else if err != nil {
return nil, fmt.Errorf("failed to read public key: %w", err)
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
if err != nil {
return "", err
}
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
return strings.TrimSpace(string(publicKey)), nil
pubKey, _, _, _, err := ssh.ParseAuthorizedKey(pubKeyFile)
return pubKey, err
}
func NewNonce(r io.Reader, length int) (string, error) {
@@ -58,25 +77,20 @@ func NewNonce(r io.Reader, length int) (string, error) {
}
func Sign(ctx context.Context, bts []byte) (string, error) {
keyPath, err := keyPath()
if err != nil {
return "", err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
privateKey, err := privateKey()
if err != nil {
return "", err
}
// get the pubkey, but remove the type
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
parts := bytes.Split(publicKey, []byte(" "))
publicKey, err := GetPublicKey()
if err != nil {
return "", err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
parts := bytes.Split(publicKeyBytes, []byte(" "))
if len(parts) < 2 {
return "", fmt.Errorf("malformed public key")
}
@@ -89,3 +103,49 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
// signature is <pubkey>:<signature>
return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
}
func initializeKeypair() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
_, err = os.Stat(privKeyPath)
if errors.Is(err, os.ErrNotExist) {
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return err
}
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
return fmt.Errorf("could not create directory %w", err)
}
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
return err
}
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
if err != nil {
return err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
return err
}
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
}
return nil
}

View File

@@ -4,10 +4,7 @@ import (
"archive/zip"
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/sha256"
"encoding/pem"
"errors"
"fmt"
"io"
@@ -379,11 +376,12 @@ func errFromUnknownKey(unknownKeyErr error) error {
if len(matches) > 0 {
serverPubKey := matches[0]
localPubKey, err := auth.GetPublicKey()
publicKey, err := auth.GetPublicKey()
if err != nil {
return unknownKeyErr
}
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
// try the ollama service public key
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
@@ -1072,7 +1070,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
}
func RunServer(cmd *cobra.Command, _ []string) error {
if err := initializeKeypair(); err != nil {
if _, err := auth.GetPublicKey(); err != nil {
return err
}
@@ -1089,52 +1087,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
return err
}
func initializeKeypair() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
_, err = os.Stat(privKeyPath)
if os.IsNotExist(err) {
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return err
}
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
return fmt.Errorf("could not create directory %w", err)
}
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
return err
}
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
if err != nil {
return err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
return err
}
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
}
return nil
}
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {

View File

@@ -71,11 +71,6 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
"tokenizer.ggml.unknown_token_id": uint32(0),
}
if m.Params.HeadDimension > 0 {
kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
}
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}

View File

@@ -40,7 +40,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
- `model`: (required) the [model name](#model-names)
- `prompt`: the prompt to generate a response for
- `suffix`: the text after the model response
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
Advanced parameters (optional):
@@ -58,8 +57,7 @@ Advanced parameters (optional):
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
> [!IMPORTANT]
> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples
@@ -150,44 +148,8 @@ If `stream` is set to `false`, the response will be a single JSON object:
}
```
#### Request (with suffix)
##### Request
```shell
curl http://localhost:11434/api/generate -d '{
"model": "codellama:code",
"prompt": "def compute_gcd(a, b):",
"suffix": " return result",
"options": {
"temperature": 0
},
"stream": false
}'
```
##### Response
```json
{
"model": "codellama:code",
"created_at": "2024-07-22T20:47:51.147561Z",
"response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n",
"done": true,
"done_reason": "stop",
"context": [...],
"total_duration": 1162761250,
"load_duration": 6683708,
"prompt_eval_count": 17,
"prompt_eval_duration": 201222000,
"eval_count": 63,
"eval_duration": 953997000
}
```
#### Request (JSON mode)
> [!IMPORTANT]
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
##### Request
@@ -418,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e
- `model`: (required) the [model name](#model-names)
- `messages`: the messages of the chat, this can be used to keep a chat memory
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
The `message` object has the following fields:
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
- `role`: the role of the message, either `system`, `user` or `assistant`
- `content`: the content of the message
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
- `tool_calls` (optional): a list of tools the model wants to use
Advanced parameters (optional):
@@ -662,79 +622,6 @@ curl http://localhost:11434/api/chat -d '{
}
```
#### Chat request (with tools)
##### Request
```
curl http://localhost:11434/api/chat -d '{
"model": "mistral",
"messages": [
{
"role": "user",
"content": "What is the weather today in Paris?"
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The location to get the weather for, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location", "format"]
}
}
}
]
}'
```
##### Response
```json
{
"model": "mistral:7b-instruct-v0.3-q4_K_M",
"created_at": "2024-07-22T20:33:28.123648Z",
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{
"function": {
"name": "get_current_weather",
"arguments": {
"format": "celsius",
"location": "Paris, FR"
}
}
}
]
},
"done_reason": "stop",
"done": true,
"total_duration": 885095291,
"load_duration": 3753500,
"prompt_eval_count": 122,
"prompt_eval_duration": 328493000,
"eval_count": 33,
"eval_duration": 552222000
}
```
## Create a Model
```shell
@@ -1139,7 +1026,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
## Generate Embeddings
```shell
POST /api/embed
POST /api/embeddings
```
Generate embeddings from a model
@@ -1147,11 +1034,10 @@ Generate embeddings from a model
### Parameters
- `model`: name of model to generate embeddings from
- `input`: text or list of text to generate embeddings for
- `prompt`: text to generate embeddings for
Advanced parameters:
- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
@@ -1160,9 +1046,9 @@ Advanced parameters:
#### Request
```shell
curl http://localhost:11434/api/embed -d '{
curl http://localhost:11434/api/embeddings -d '{
"model": "all-minilm",
"input": "Why is the sky blue?"
"prompt": "Here is an article about llamas..."
}'
```
@@ -1170,35 +1056,10 @@ curl http://localhost:11434/api/embed -d '{
```json
{
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
]]
}
```
#### Request (Multiple input)
```shell
curl http://localhost:11434/api/embed -d '{
"model": "all-minilm",
"input": ["Why is the sky blue?", "Why is the grass green?"]
}'
```
#### Response
```json
{
"model": "all-minilm",
"embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
],[
-0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
]]
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]
}
```
@@ -1245,45 +1106,3 @@ A single JSON object will be returned.
]
}
```
## Generate Embedding
> Note: this endpoint has been superseded by `/api/embed`
```shell
POST /api/embeddings
```
Generate embeddings from a model
### Parameters
- `model`: name of model to generate embeddings from
- `prompt`: text to generate embeddings for
Advanced parameters:
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
### Examples
#### Request
```shell
curl http://localhost:11434/api/embeddings -d '{
"model": "all-minilm",
"prompt": "Here is an article about llamas..."
}'
```
#### Response
```json
{
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]
}
```

View File

@@ -1,7 +1,6 @@
# Ollama Model File
> [!NOTE]
> `Modelfile` syntax is in development
> Note: `Modelfile` syntax is in development
A model file is the blueprint to create and share models with Ollama.

View File

@@ -78,8 +78,8 @@ curl http://localhost:11434/v1/chat/completions \
- [x] Streaming
- [x] JSON mode
- [x] Reproducible outputs
- [x] Tools (streaming support coming soon)
- [ ] Vision
- [ ] Function calling
- [ ] Logprobs
#### Supported request fields
@@ -97,9 +97,9 @@ curl http://localhost:11434/v1/chat/completions \
- [x] `temperature`
- [x] `top_p`
- [x] `max_tokens`
- [x] `tools`
- [ ] `tool_choice`
- [ ] `logit_bias`
- [ ] `tools`
- [ ] `tool_choice`
- [ ] `user`
- [ ] `n`

View File

@@ -1,173 +0,0 @@
# Template
Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
## Basic Template Structure
A basic Go template consists of three main parts:
* **Layout**: The overall structure of the template.
* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
Here's an example of a simple chat template:
```gotmpl
{{- range .Messages }}
{{ .Role }}: {{ .Content }}
{{- end }}
```
In this example, we have:
* A basic messages structure (layout)
* Three variables: `Messages`, `Role`, and `Content` (variables)
* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
## Adding templates to your model
By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
```dockerfile
FROM llama3
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>
{{- end }}
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
{{ .Content }}<|eot_id|>
{{- end }}<|start_header_id|>assistant<|end_header_id|>
"""
```
## Variables
`System` (string): system prompt
`Prompt` (string): user prompt
`Response` (string): assistant response
`Suffix` (string): text inserted after the assistant's response
`Messages` (list): list of messages
`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
`Messages[].Content` (string): message content
`Messages[].ToolCalls` (list): list of tools the model wants to call
`Messages[].ToolCalls[].Function` (object): function to call
`Messages[].ToolCalls[].Function.Name` (string): function name
`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
`Tools` (list): list of tools the model can access
`Tools[].Type` (string): schema type. `type` is always `function`
`Tools[].Function` (object): function definition
`Tools[].Function.Name` (string): function name
`Tools[].Function.Description` (string): function description
`Tools[].Function.Parameters` (object): function parameters
`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
`Tools[].Function.Parameters.Required` (list): list of required properties
`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
`Tools[].Function.Parameters.Properties[].Type` (string): property type
`Tools[].Function.Parameters.Properties[].Description` (string): property description
`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
## Tips and Best Practices
Keep the following tips and best practices in mind when working with Go templates:
* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
## Examples
### Example Messages
#### ChatML
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
```gotmpl
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}
{{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ else }}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
```
### Example Tools
Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
#### Mistral
Mistral v0.3 and Mixtral 8x22B supports tool calling.
```gotmpl
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
{{ end }}{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }} {{ .Content }}</s>
{{- else if .ToolCalls }}[TOOL_CALLS] [
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}
```
### Example Fill-in-Middle
Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
#### CodeLlama
CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
```gotmpl
<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
```
> [!NOTE]
> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
#### Codestral
Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
```gotmpl
[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
```

View File

@@ -4,45 +4,12 @@ package integration
import (
"context"
"math"
"testing"
"time"
"github.com/ollama/ollama/api"
)
func floatsEqual32(a, b float32) bool {
return math.Abs(float64(a-b)) <= 1e-4
}
func floatsEqual64(a, b float64) bool {
return math.Abs(a-b) <= 1e-4
}
func TestAllMiniLMEmbeddings(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
req := api.EmbeddingRequest{
Model: "all-minilm",
Prompt: "why is the sky blue?",
}
res, err := embeddingTestHelper(ctx, t, req)
if err != nil {
t.Fatalf("error: %v", err)
}
if len(res.Embedding) != 384 {
t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
}
if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
}
}
func TestAllMiniLMEmbed(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
@@ -66,8 +33,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
if res.Embeddings[0][0] != 0.010071031 {
t.Fatalf("expected 0.010071031, got %f", res.Embeddings[0][0])
}
}
@@ -94,12 +61,12 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
if res.Embeddings[0][0] != 0.010071031 || res.Embeddings[1][0] != -0.009802706 {
t.Fatalf("expected 0.010071031 and -0.009802706, got %f and %f", res.Embeddings[0][0], res.Embeddings[1][0])
}
}
func TestAllMiniLMEmbedTruncate(t *testing.T) {
func TestAllMiniLmEmbedTruncate(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
@@ -168,22 +135,6 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
}
}
func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("failed to pull model %s: %v", req.Model, err)
}
response, err := client.Embeddings(ctx, &req)
if err != nil {
return nil, err
}
return response, nil
}
func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()

View File

@@ -8,7 +8,6 @@ import (
"io"
"log/slog"
"math"
"math/rand/v2"
"net/http"
"net/url"
"os"
@@ -142,32 +141,6 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *regis
b.err = b.run(ctx, requestURL, opts)
}
func newBackoff(maxBackoff time.Duration) func(ctx context.Context) error {
var n int
return func(ctx context.Context) error {
if ctx.Err() != nil {
return ctx.Err()
}
n++
// n^2 backoff timer is a little smoother than the
// common choice of 2^n.
d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
// Randomize the delay between 0.5-1.5 x msec, in order
// to prevent accidental "thundering herd" problems.
d = time.Duration(float64(d) * (rand.Float64() + 0.5))
t := time.NewTimer(d)
defer t.Stop()
select {
case <-ctx.Done():
return ctx.Err()
case <-t.C:
return nil
}
}
}
func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
defer blobDownloadManager.Delete(b.Digest)
ctx, b.CancelFunc = context.WithCancel(ctx)
@@ -180,52 +153,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
_ = file.Truncate(b.Total)
directURL, err := func() (*url.URL, error) {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
backoff := newBackoff(10 * time.Second)
for {
// shallow clone opts to be used in the closure
// without affecting the outer opts.
newOpts := new(registryOptions)
*newOpts = *opts
newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) > 10 {
return errors.New("maxium redirects exceeded (10) for directURL")
}
// if the hostname is the same, allow the redirect
if req.URL.Hostname() == requestURL.Hostname() {
return nil
}
// stop at the first redirect that is not
// the same hostname as the original
// request.
return http.ErrUseLastResponse
}
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, nil, nil, newOpts)
if err != nil {
slog.Warn("failed to get direct URL; backing off and retrying", "err", err)
if err := backoff(ctx); err != nil {
return nil, err
}
continue
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusTemporaryRedirect {
return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
}
return resp.Location()
}
}()
if err != nil {
return err
}
g, inner := errgroup.WithContext(ctx)
g.SetLimit(numDownloadParts)
for i := range b.Parts {
@@ -238,7 +165,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
var err error
for try := 0; try < maxRetries; try++ {
w := io.NewOffsetWriter(file, part.StartsAt())
err = b.downloadChunk(inner, directURL, w, part, opts)
err = b.downloadChunk(inner, requestURL, w, part, opts)
switch {
case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
// return immediately if the context is canceled or the device is out of space

View File

@@ -32,6 +32,7 @@ import (
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
"golang.org/x/crypto/ssh"
)
var (
@@ -54,8 +55,6 @@ type registryOptions struct {
Username string
Password string
Token string
CheckRedirect func(req *http.Request, via []*http.Request) error
}
type Model struct {
@@ -1090,11 +1089,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
if anonymous {
// no user is associated with the public key, and the request requires non-anonymous access
pubKey, nestedErr := auth.GetPublicKey()
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(pubKey)))
if nestedErr != nil {
slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
return nil, errUnauthorized
}
return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
return nil, &errtypes.UnknownOllamaKey{Key: localPubKey}
}
// user is associated with the public key, but is not authorized to make the request
return nil, errUnauthorized
@@ -1133,9 +1133,7 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
req.ContentLength = contentLength
}
resp, err := (&http.Client{
CheckRedirect: regOpts.CheckRedirect,
}).Do(req)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}

View File

@@ -263,27 +263,13 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
if t, err := template.Named(s); err != nil {
slog.Debug("template detection", "error", err)
} else {
layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
if err != nil {
return nil, err
}
layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
layers = append(layers, &layerGGML{layer, nil})
if t.Parameters != nil {
var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
return nil, err
}
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
if err != nil {
return nil, err
}
layers = append(layers, &layerGGML{layer, nil})
}
tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
layers = append(layers, &layerGGML{tmpl, nil})
}
}
}

View File

@@ -609,9 +609,10 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
defer cancel()
quantization := cmp.Or(r.Quantize, r.Quantization)
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) {
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
} else if err != nil {
if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
if errors.Is(err, errBadTemplate) {
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
}
ch <- gin.H{"error": err.Error()}
}
}()

View File

@@ -599,10 +599,9 @@ func TestCreateDetectTemplate(t *testing.T) {
}
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
})
})

View File

@@ -132,8 +132,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 {
numParallel = 1
slog.Warn("multimodal models don't support parallel requests yet")
} else if strings.Contains(pending.model.Config.ModelFamily, "bert") {
numParallel = runtime.NumCPU()
}
for {

View File

@@ -1,8 +0,0 @@
{
"stop": [
"<start_system>",
"<end_message>",
"<start_user>",
"<start_assistant>"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"### Instruction:",
"### Response"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"<|im_start|>",
"<|im_end|>"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"System:",
"User:",
"Assistant:",
"<|begin_of_text|>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"Source:",
"Destination:",
"<step>"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"User:",
"Assistant:"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"<start_of_turn>",
"<end_of_turn>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"System:",
"Question:",
"Answer:"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"[INST]",
"[/INST]",
"<<SYS>>",
"<</SYS>>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"<|start_header_id|>",
"<|end_header_id|>",
"<|eot_id|>"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"@@ Instruction",
"@@ Response"
]
}

View File

@@ -1,6 +0,0 @@
{
"stop": [
"<|im_start|>",
"<|im_end|>"
]
}

View File

@@ -1,5 +0,0 @@
{
"stop": [
"<|end_of_turn|>"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"<|end|>",
"<|system|>",
"<|user|>",
"<|assistant|>"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"### System:",
"### User:",
"### Assistant"
]
}

View File

@@ -1,7 +0,0 @@
{
"stop": [
"### Instruction",
"### Response",
"<|endoftext|>"
]
}

View File

@@ -23,7 +23,6 @@ import (
var indexBytes []byte
//go:embed *.gotmpl
//go:embed *.json
var templatesFS embed.FS
var templatesOnce = sync.OnceValues(func() ([]*named, error) {
@@ -40,15 +39,6 @@ var templatesOnce = sync.OnceValues(func() ([]*named, error) {
// normalize line endings
t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
params, err := templatesFS.ReadFile(t.Name + ".json")
if err != nil {
continue
}
if err := json.Unmarshal(params, &t.Parameters); err != nil {
return nil, err
}
}
return templates, nil
@@ -58,10 +48,6 @@ type named struct {
Name string `json:"name"`
Template string `json:"template"`
Bytes []byte
Parameters *struct {
Stop []string `json:"stop"`
}
}
func (t named) Reader() io.Reader {

View File

@@ -1,6 +0,0 @@
{
"stop": [
"USER:",
"ASSISTANT:"
]
}

View File

@@ -1,8 +0,0 @@
{
"stop": [
"<|system|>",
"</s>",
"<|user|>",
"<|assistant|>"
]
}