Compare commits

..

1 Commits

Author SHA1 Message Date
Blake Mizerany
49c126fde8 build.go: introduce a friendlier way to build Ollama
This commit introduces a more friendly way to build Ollama dependencies
and the binary without abusing `go generate` and removing the
unnecessary extra steps it brings with it.

This script also provides nicer feedback to the user about what is
happening during the build process.

At the end, it prints a helpful message to the user about what to do
next (e.g. run the new local Ollama).
2024-04-09 13:52:08 -07:00
64 changed files with 796 additions and 3234 deletions

View File

@@ -30,7 +30,7 @@ jobs:
security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- name: Build Darwin
env:
@@ -86,7 +86,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- run: go get ./...
- run: |
@@ -95,7 +95,8 @@ jobs:
cd $env:GITHUB_WORKSPACE
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$env:PATH"
go generate -x ./...
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
name: go generate
- uses: actions/upload-artifact@v4
with:
@@ -139,7 +140,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- name: 'Install ROCm'
run: |
@@ -214,7 +215,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- name: 'Install CUDA'
run: |
@@ -300,7 +301,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- run: go get
- uses: actions/download-artifact@v4

View File

@@ -1,10 +1,22 @@
name: test
concurrency:
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
# cancels running CI jobs and starts all new ones.
#
# For non-PR pushes, concurrency.group needs to be unique for every distinct
# CI run we want to have happen. Use run_id, which in practice means all
# non-PR CI runs will be allowed to run without preempting each other.
group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
cancel-in-progress: true
on:
pull_request:
paths:
- '**/*'
- '!docs/**'
- '!examples/**'
- '!README.md'
jobs:
@@ -50,7 +62,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- run: go get ./...
- run: |
@@ -61,10 +73,12 @@ jobs:
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$gccpath;$env:PATH"
echo $env:PATH
go generate -x ./...
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
if: ${{ startsWith(matrix.os, 'windows-') }}
name: 'Windows Go Generate'
- run: go generate -x ./...
- run: |
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
if: ${{ ! startsWith(matrix.os, 'windows-') }}
name: 'Unix Go Generate'
- uses: actions/upload-artifact@v4
@@ -92,12 +106,12 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- run: go get ./...
- run: |
git config --global --add safe.directory /__w/ollama/ollama
go generate -x ./...
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
env:
OLLAMA_SKIP_CPU_GENERATE: '1'
- uses: actions/upload-artifact@v4
@@ -123,12 +137,12 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- run: go get ./...
- run: |
git config --global --add safe.directory /__w/ollama/ollama
go generate -x ./...
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
env:
OLLAMA_SKIP_CPU_GENERATE: '1'
- uses: actions/upload-artifact@v4
@@ -145,7 +159,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- name: 'Install ROCm'
run: |
@@ -167,8 +181,9 @@ jobs:
$env:PATH="$gopath;$env:PATH"
$env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
go generate -x ./...
name: go generate
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
name: go run build.go
env:
OLLAMA_SKIP_CPU_GENERATE: '1'
# TODO - do we need any artifacts?
@@ -182,7 +197,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- name: 'Install CUDA'
run: |
@@ -201,7 +216,7 @@ jobs:
- name: 'Verify CUDA'
run: nvcc -V
- run: go get ./...
- name: go generate
- name: go run build.go
run: |
$gopath=(get-command go).source | split-path -parent
$cudabin=(get-command nvcc).source | split-path
@@ -210,7 +225,8 @@ jobs:
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$cudabin;$env:PATH"
$env:OLLAMA_SKIP_CPU_GENERATE="1"
go generate -x ./...
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
env:
OLLAMA_SKIP_CPU_GENERATE: '1'
# TODO - do we need any artifacts?
@@ -237,7 +253,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: false
- run: |
case ${{ matrix.arch }} in
@@ -282,8 +298,14 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: '1.22'
cache: true
- run: |
GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
if: ${{ ! startsWith(matrix.os, 'windows-') }}
- run: |
$env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
if: ${{ startsWith(matrix.os, 'windows-') }}
- run: go get
- run: |
case ${{ matrix.arch }} in
@@ -304,9 +326,8 @@ jobs:
touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server
if: ${{ startsWith(matrix.os, 'windows-') }}
shell: bash
- run: go generate ./...
- run: go build
- run: go test -v ./...
- run: |
go test -v ./...
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}-binaries

View File

@@ -42,7 +42,7 @@ ARG CGO_CFLAGS
ARG AMDGPU_TARGETS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
RUN mkdir /tmp/scratch && \
for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
for dep in $(cat /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/x86_64/rocm*/lib/deps.txt) ; do \
cp ${dep} /tmp/scratch/ || exit 1 ; \
done && \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \

View File

@@ -64,7 +64,6 @@ Here are some example models that can be downloaded:
| LLaVA | 7B | 4.5GB | `ollama run llava` |
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
| Solar | 10.7B | 6.1GB | `ollama run solar` |
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
@@ -202,16 +201,10 @@ Install `cmake` and `go`:
brew install cmake go
```
Then generate dependencies:
```
go generate ./...
```
Then build the binary:
```
go build .
go run build.go
```
More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
@@ -317,7 +310,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Database
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md)
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
### Package managers

View File

@@ -1,9 +1,3 @@
// Package api implements the client-side API for code wishing to interact
// with the ollama service. The methods of the [Client] type correspond to
// the ollama REST API as described in https://github.com/ollama/ollama/blob/main/docs/api.md
//
// The ollama command-line client itself uses this package to interact with
// the backend service.
package api
import (
@@ -11,6 +5,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net"
@@ -24,8 +19,6 @@ import (
"github.com/ollama/ollama/version"
)
// Client encapsulates client state for interacting with the ollama
// service. Use [ClientFromEnvironment] to create new Clients.
type Client struct {
base *url.URL
http *http.Client
@@ -47,15 +40,6 @@ func checkError(resp *http.Response, body []byte) error {
return apiError
}
// ClientFromEnvironment creates a new [Client] using configuration from the
// environment variable OLLAMA_HOST, which points to the network host and
// port on which the ollama service is listenting. The format of this variable
// is:
//
// <scheme>://<host>:<port>
//
// If the variable is not specified, a default ollama host and port will be
// used.
func ClientFromEnvironment() (*Client, error) {
defaultPort := "11434"
@@ -207,14 +191,8 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
return nil
}
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
// a response is received from the service. If this function returns an error,
// [Client.Generate] will stop generating and return this error.
type GenerateResponseFunc func(GenerateResponse) error
// Generate generates a response for a given prompt. The req parameter should
// be populated with prompt details. fn is called for each response (there may
// be multiple responses, e.g. in case streaming is enabled).
func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn GenerateResponseFunc) error {
return c.stream(ctx, http.MethodPost, "/api/generate", req, func(bts []byte) error {
var resp GenerateResponse
@@ -226,15 +204,8 @@ func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn Generate
})
}
// ChatResponseFunc is a function that [Client.Chat] invokes every time
// a response is received from the service. If this function returns an error,
// [Client.Chat] will stop generating and return this error.
type ChatResponseFunc func(ChatResponse) error
// Chat generates the next message in a chat. [ChatRequest] may contain a
// sequence of messages which can be used to maintain chat history with a model.
// fn is called for each response (there may be multiple responses, e.g. if case
// streaming is enabled).
func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
var resp ChatResponse
@@ -246,14 +217,8 @@ func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc
})
}
// PullProgressFunc is a function that [Client.Pull] invokes every time there
// is progress with a "pull" request sent to the service. If this function
// returns an error, [Client.Pull] will stop the process and return this error.
type PullProgressFunc func(ProgressResponse) error
// Pull downloads a model from the ollama library. fn is called each time
// progress is made on the request and can be used to display a progress bar,
// etc.
func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
return c.stream(ctx, http.MethodPost, "/api/pull", req, func(bts []byte) error {
var resp ProgressResponse
@@ -336,7 +301,18 @@ func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*Embedd
}
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
return c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil)
if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
var statusError StatusError
if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
return err
}
if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
return err
}
}
return nil
}
func (c *Client) Version(ctx context.Context) (string, error) {

View File

@@ -33,46 +33,18 @@ func (e StatusError) Error() string {
type ImageData []byte
// GenerateRequest describes a request sent by [Client.Generate]. While you
// have to specify the Model and Prompt fields, all the other fields have
// reasonable defaults for basic uses.
type GenerateRequest struct {
// Model is the model name; it should be a name familiar to Ollama from
// the library at https://ollama.com/library
Model string `json:"model"`
Model string `json:"model"`
Prompt string `json:"prompt"`
System string `json:"system"`
Template string `json:"template"`
Context []int `json:"context,omitempty"`
Stream *bool `json:"stream,omitempty"`
Raw bool `json:"raw,omitempty"`
Format string `json:"format"`
KeepAlive *Duration `json:"keep_alive,omitempty"`
Images []ImageData `json:"images,omitempty"`
// Prompt is the textual prompt to send to the model.
Prompt string `json:"prompt"`
// System overrides the model's default system message/prompt.
System string `json:"system"`
// Template overrides the model's default prompt template.
Template string `json:"template"`
// Context is the context parameter returned from a previous call to
// Generate call. It can be used to keep a short conversational memory.
Context []int `json:"context,omitempty"`
// Stream specifies whether the response is streaming; it is true by default.
Stream *bool `json:"stream,omitempty"`
// Raw set to true means that no formatting will be applied to the prompt.
Raw bool `json:"raw,omitempty"`
// Format specifies the format to return a response in.
Format string `json:"format"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
// Images is an optional list of base64-encoded images accompanying this
// request, for multimodal models.
Images []ImageData `json:"images,omitempty"`
// Options lists model-specific options. For example, temperature can be
// set through this field, if the model supports it.
Options map[string]interface{} `json:"options"`
}
@@ -137,24 +109,19 @@ type Options struct {
// Runner options which must be set when the model is loaded into memory
type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
F16KV bool `json:"f16_kv,omitempty"`
LogitsAll bool `json:"logits_all,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"`
UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
NumThread int `json:"num_thread,omitempty"`
// Unused: RopeFrequencyBase is ignored. Instead the value in the model will be used
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
// Unused: RopeFrequencyScale is ignored. Instead the value in the model will be used
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
F16KV bool `json:"f16_kv,omitempty"`
LogitsAll bool `json:"logits_all,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"`
UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
NumThread int `json:"num_thread,omitempty"`
}
type EmbeddingRequest struct {
@@ -170,11 +137,10 @@ type EmbeddingResponse struct {
}
type CreateRequest struct {
Model string `json:"model"`
Path string `json:"path"`
Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"`
Quantization string `json:"quantization,omitempty"`
Model string `json:"model"`
Path string `json:"path"`
Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"`
// Name is deprecated, see Model
Name string `json:"name"`
@@ -414,16 +380,16 @@ func DefaultOptions() Options {
Runner: Runner{
// options set when the model is loaded
NumCtx: 2048,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false,
F16KV: true,
UseMLock: false,
UseMMap: true,
UseNUMA: false,
NumCtx: 2048,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false,
F16KV: true,
UseMLock: false,
UseMMap: true,
UseNUMA: false,
},
}
}

View File

@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"syscall"
"time"
"github.com/ollama/ollama/api"
@@ -86,29 +87,19 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
// Re-wire context done behavior to attempt a graceful shutdown of the server
cmd.Cancel = func() error {
if cmd.Process != nil {
err := terminate(cmd)
if err != nil {
slog.Warn("error trying to gracefully terminate server", "err", err)
return cmd.Process.Kill()
}
cmd.Process.Signal(os.Interrupt) //nolint:errcheck
tick := time.NewTicker(10 * time.Millisecond)
defer tick.Stop()
for {
select {
case <-tick.C:
exited, err := isProcessExited(cmd.Process.Pid)
if err != nil {
return err
}
if exited {
return nil
// OS agnostic "is it still running"
if proc, err := os.FindProcess(int(cmd.Process.Pid)); err != nil || errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
return nil //nolint:nilerr
}
case <-time.After(5 * time.Second):
slog.Warn("graceful server shutdown timeout, killing", "pid", cmd.Process.Pid)
return cmd.Process.Kill()
cmd.Process.Kill() //nolint:errcheck
}
}
}

View File

@@ -4,35 +4,9 @@ package lifecycle
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"syscall"
)
func getCmd(ctx context.Context, cmd string) *exec.Cmd {
return exec.CommandContext(ctx, cmd, "serve")
}
func terminate(cmd *exec.Cmd) error {
return cmd.Process.Signal(os.Interrupt)
}
func isProcessExited(pid int) (bool, error) {
proc, err := os.FindProcess(pid)
if err != nil {
return false, fmt.Errorf("failed to find process: %v", err)
}
err = proc.Signal(syscall.Signal(0))
if err != nil {
if errors.Is(err, os.ErrProcessDone) || errors.Is(err, syscall.ESRCH) {
return true, nil
}
return false, fmt.Errorf("error signaling process: %v", err)
}
return false, nil
}

View File

@@ -2,88 +2,12 @@ package lifecycle
import (
"context"
"fmt"
"os/exec"
"syscall"
"golang.org/x/sys/windows"
)
func getCmd(ctx context.Context, exePath string) *exec.Cmd {
cmd := exec.CommandContext(ctx, exePath, "serve")
cmd.SysProcAttr = &syscall.SysProcAttr{
HideWindow: true,
CreationFlags: windows.CREATE_NEW_PROCESS_GROUP,
}
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: true, CreationFlags: 0x08000000}
return cmd
}
func terminate(cmd *exec.Cmd) error {
dll, err := windows.LoadDLL("kernel32.dll")
if err != nil {
return err
}
defer dll.Release() // nolint: errcheck
pid := cmd.Process.Pid
f, err := dll.FindProc("AttachConsole")
if err != nil {
return err
}
r1, _, err := f.Call(uintptr(pid))
if r1 == 0 && err != syscall.ERROR_ACCESS_DENIED {
return err
}
f, err = dll.FindProc("SetConsoleCtrlHandler")
if err != nil {
return err
}
r1, _, err = f.Call(0, 1)
if r1 == 0 {
return err
}
f, err = dll.FindProc("GenerateConsoleCtrlEvent")
if err != nil {
return err
}
r1, _, err = f.Call(windows.CTRL_BREAK_EVENT, uintptr(pid))
if r1 == 0 {
return err
}
r1, _, err = f.Call(windows.CTRL_C_EVENT, uintptr(pid))
if r1 == 0 {
return err
}
return nil
}
const STILL_ACTIVE = 259
func isProcessExited(pid int) (bool, error) {
hProcess, err := windows.OpenProcess(windows.PROCESS_QUERY_INFORMATION, false, uint32(pid))
if err != nil {
return false, fmt.Errorf("failed to open process: %v", err)
}
defer windows.CloseHandle(hProcess) // nolint: errcheck
var exitCode uint32
err = windows.GetExitCodeProcess(hProcess, &exitCode)
if err != nil {
return false, fmt.Errorf("failed to get exit code: %v", err)
}
if exitCode == STILL_ACTIVE {
return false, nil
}
return true, nil
}

View File

@@ -24,5 +24,10 @@ func NewTray() (commontray.OllamaTray, error) {
return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err)
}
return InitPlatformTray(icon, updateIcon)
tray, err := InitPlatformTray(icon, updateIcon)
if err != nil {
return nil, err
}
return tray, nil
}

192
build.go Normal file
View File

@@ -0,0 +1,192 @@
//go:build ignore
package main
import (
"cmp"
"errors"
"flag"
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
)
// Flags
var (
flagForce = flag.Bool("f", false, "force re-generation of dependencies")
flagSkipBuild = flag.Bool("d", false, "generate dependencies only (e.g. skip 'go build .')")
// Flags to set GOARCH and GOOS explicitly for cross-platform builds,
// e.g., in CI to target a different platform than the build matrix
// default. These allows us to run generate without a separate build
// step for building the script binary for the host ARCH and then
// runing the generate script for the target ARCH. Instead, we can
// just run `go run build.go -target=$GOARCH` to generate the
// deps.
flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
)
func buildEnv() []string {
return append(os.Environ(),
"GOARCH="+cmp.Or(*flagGOARCH, runtime.GOARCH),
)
}
func main() {
log.SetFlags(0)
flag.Usage = func() {
log.Printf("Usage: go run build.go [flags]")
log.Println()
log.Println("Flags:")
flag.PrintDefaults()
log.Println()
log.Println("This script builds the Ollama server binary and generates the llama.cpp")
log.Println("bindings for the current platform. It assumes that the current working")
log.Println("directory is the root directory of the Ollama project.")
log.Println()
log.Println("If the -d flag is provided, the script will only generate the dependencies")
log.Println("and skip building the Ollama server binary.")
log.Println()
log.Println("If the -f flag is provided, the script will force re-generation of the")
log.Println("dependencies.")
log.Println()
log.Println("If the -target flag is provided, the script will set GOARCH to the value")
log.Println("of the flag. This is useful for cross-platform builds.")
log.Println()
log.Println("The script will check for the required dependencies (cmake, gcc) and")
log.Println("print their version.")
log.Println()
log.Println("The script will also check if it is being run from the root directory of")
log.Println("the Ollama project.")
log.Println()
os.Exit(1)
}
flag.Parse()
log.Printf("=== Building Ollama ===")
defer func() {
log.Printf("=== Done building Ollama ===")
log.Println()
log.Println("To run the Ollama server, use:")
log.Println()
log.Println(" ./ollama serve")
log.Println()
}()
if flag.NArg() > 0 {
flag.Usage()
}
if !inRootDir() {
log.Fatalf("Please run this script from the root directory of the Ollama project.")
}
if err := checkDependencies(); err != nil {
log.Fatalf("Failed dependency check: %v", err)
}
if err := buildLlammaCPP(); err != nil {
log.Fatalf("Failed to build llama.cpp: %v", err)
}
if err := goBuildOllama(); err != nil {
log.Fatalf("Failed to build ollama Go binary: %v", err)
}
}
// checkDependencies does a quick check to see if the required dependencies are
// installed on the system and functioning enough to print their version.
//
// TODO(bmizerany): Check the actual version of the dependencies? Seems a
// little daunting given diff versions might print diff things. This should
// be good enough for now.
func checkDependencies() error {
var err error
check := func(name string, args ...string) {
log.Printf("=== Checking for %s ===", name)
defer log.Printf("=== Done checking for %s ===\n\n", name)
cmd := exec.Command(name, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
err = errors.Join(err, cmd.Run())
}
check("cmake", "--version")
check("gcc", "--version")
return err
}
func goBuildOllama() error {
log.Println("=== Building Ollama binary ===")
defer log.Printf("=== Done building Ollama binary ===\n\n")
if *flagSkipBuild {
log.Println("Skipping 'go build -o ollama .'")
return nil
}
cmd := exec.Command("go", "build", "-o", "ollama", ".")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Env = buildEnv()
return cmd.Run()
}
// buildLlammaCPP generates the llama.cpp bindings for the current platform.
//
// It assumes that the current working directory is the root directory of the
// Ollama project.
func buildLlammaCPP() error {
log.Println("=== Generating dependencies ===")
defer log.Printf("=== Done generating dependencies ===\n\n")
if *flagForce {
if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
return err
}
}
if isDirectory(filepath.Join("llm", "build")) {
log.Println("llm/build already exists; skipping. Use -f to force re-generate.")
return nil
}
scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
if err != nil {
return err
}
var cmd *exec.Cmd
switch runtime.GOOS {
case "windows":
script := filepath.Join(scriptDir, "gen_windows.ps1")
cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
case "linux":
script := filepath.Join(scriptDir, "gen_linux.sh")
cmd = exec.Command("bash", script)
case "darwin":
script := filepath.Join(scriptDir, "gen_darwin.sh")
cmd = exec.Command("bash", script)
default:
log.Fatalf("Unsupported OS: %s", runtime.GOOS)
}
cmd.Dir = filepath.Join("llm", "generate")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Env = buildEnv()
log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
return cmd.Run()
}
func isDirectory(path string) bool {
info, err := os.Stat(path)
if err != nil {
return false
}
return info.IsDir()
}
// inRootDir returns true if the current working directory is the root
// directory of the Ollama project. It looks for a file named "go.mod".
func inRootDir() bool {
_, err := os.Stat("go.mod")
return err == nil
}

View File

@@ -105,48 +105,24 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
zf := zip.NewWriter(tf)
files := []string{}
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
files, err := filepath.Glob(filepath.Join(path, "model-*.safetensors"))
if err != nil {
return err
} else if len(tfiles) == 0 {
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
if err != nil {
return err
}
}
files = append(files, tfiles...)
if len(files) == 0 {
return fmt.Errorf("no models were found in '%s'", path)
return fmt.Errorf("no safetensors files were found in '%s'", path)
}
// add the safetensor/torch config file + tokenizer
// add the safetensor config file + tokenizer
files = append(files, filepath.Join(path, "config.json"))
files = append(files, filepath.Join(path, "params.json"))
files = append(files, filepath.Join(path, "added_tokens.json"))
files = append(files, filepath.Join(path, "tokenizer.model"))
for _, fn := range files {
f, err := os.Open(fn)
// just skip whatever files aren't there
if os.IsNotExist(err) {
if strings.HasSuffix(fn, "tokenizer.model") {
// try the parent dir before giving up
parentDir := filepath.Dir(path)
newFn := filepath.Join(parentDir, "tokenizer.model")
f, err = os.Open(newFn)
if os.IsNotExist(err) {
continue
} else if err != nil {
return err
}
} else {
continue
}
if os.IsNotExist(err) && strings.HasSuffix(fn, "added_tokens.json") {
continue
} else if err != nil {
return err
}
@@ -218,9 +194,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return nil
}
quantization, _ := cmd.Flags().GetString("quantization")
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile), Quantization: quantization}
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
if err := client.Create(cmd.Context(), &request, fn); err != nil {
return err
}
@@ -252,6 +226,14 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
}
func RunHandler(cmd *cobra.Command, args []string) error {
if os.Getenv("OLLAMA_MODELS") != "" {
return errors.New("OLLAMA_MODELS must only be set for 'ollama serve'")
}
if err := checkServerHeartbeat(cmd, args); err != nil {
return err
}
client, err := api.ClientFromEnvironment()
if err != nil {
return err
@@ -961,7 +943,6 @@ func NewCLI() *cobra.Command {
}
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
createCmd.Flags().StringP("quantization", "q", "", "Quantization level.")
showCmd := &cobra.Command{
Use: "show MODEL",
@@ -978,11 +959,10 @@ func NewCLI() *cobra.Command {
showCmd.Flags().Bool("system", false, "Show system message of a model")
runCmd := &cobra.Command{
Use: "run MODEL [PROMPT]",
Short: "Run a model",
Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat,
RunE: RunHandler,
Use: "run MODEL [PROMPT]",
Short: "Run a model",
Args: cobra.MinimumNArgs(1),
RunE: RunHandler,
}
runCmd.Flags().Bool("verbose", false, "Show timings for response")

View File

@@ -1,16 +1,21 @@
package convert
import (
"bytes"
"cmp"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"slices"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/mitchellh/mapstructure"
"github.com/x448/float16"
"google.golang.org/protobuf/proto"
"github.com/ollama/ollama/convert/sentencepiece"
@@ -40,45 +45,157 @@ type ByteOrder interface {
binary.AppendByteOrder
}
type MetaData struct {
Type string `mapstructure:"dtype"`
Shape []int `mapstructure:"shape"`
Offsets []int `mapstructure:"data_offsets"`
}
type ModelArch interface {
GetTensors() error
LoadVocab() error
WriteGGUF() (string, error)
}
type ModelFormat interface {
GetLayerName(string) (string, error)
GetTensors(string, *Params) ([]llm.Tensor, error)
GetParams(string) (*Params, error)
GetModelArch(string, string, *Params) (ModelArch, error)
}
type ModelData struct {
Path string
Name string
Params *Params
Vocab *Vocab
Tensors []llm.Tensor
Format ModelFormat
}
func GetModelFormat(dirname string) (ModelFormat, error) {
files, err := filepath.Glob(filepath.Join(dirname, "*"))
func ReadSafeTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
f, err := os.Open(fn)
if err != nil {
return nil, 0, err
}
defer f.Close()
var jsonSize uint64
if err := binary.Read(f, binary.LittleEndian, &jsonSize); err != nil {
return nil, 0, err
}
buf := make([]byte, jsonSize)
_, err = io.ReadFull(f, buf)
if err != nil {
return nil, 0, err
}
d := json.NewDecoder(bytes.NewBuffer(buf))
d.UseNumber()
var parsed map[string]interface{}
if err = d.Decode(&parsed); err != nil {
return nil, 0, err
}
var keys []string
for k := range parsed {
keys = append(keys, k)
}
slices.Sort(keys)
slog.Info("converting layers")
var tensors []llm.Tensor
for _, k := range keys {
vals := parsed[k].(map[string]interface{})
var data MetaData
if err = mapstructure.Decode(vals, &data); err != nil {
return nil, 0, err
}
var size uint64
var kind uint32
switch len(data.Shape) {
case 0:
// metadata
continue
case 1:
// convert to float32
kind = 0
size = uint64(data.Shape[0] * 4)
case 2:
// convert to float16
kind = 1
size = uint64(data.Shape[0] * data.Shape[1] * 2)
}
ggufName, err := GetTensorName(k)
if err != nil {
slog.Error("%v", err)
return nil, 0, err
}
shape := []uint64{0, 0, 0, 0}
for i := range data.Shape {
shape[i] = uint64(data.Shape[i])
}
t := llm.Tensor{
Name: ggufName,
Kind: kind,
Offset: offset,
Shape: shape[:],
}
t.WriterTo = safetensorWriterTo{
t: &t,
params: params,
bo: params.ByteOrder,
filename: fn,
start: uint64(data.Offsets[0]),
end: uint64(data.Offsets[1]),
padding: 8 + jsonSize,
}
slog.Debug(fmt.Sprintf("%v", t))
tensors = append(tensors, t)
offset += size
}
return tensors, offset, nil
}
func GetSafeTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
var tensors []llm.Tensor
files, err := filepath.Glob(filepath.Join(dirpath, "/model-*.safetensors"))
if err != nil {
return nil, err
}
for _, fn := range files {
slog.Debug(fmt.Sprintf("file = %s", fn))
if strings.HasSuffix(fn, ".safetensors") {
return &SafetensorFormat{}, nil
} else if strings.HasSuffix(fn, ".bin") {
slog.Debug("model is torch")
return &TorchFormat{}, nil
var offset uint64
for _, f := range files {
var t []llm.Tensor
var err error
t, offset, err = ReadSafeTensors(f, offset, params)
if err != nil {
slog.Error("%v", err)
return nil, err
}
tensors = append(tensors, t...)
}
return tensors, nil
}
func GetParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "config.json"))
if err != nil {
return nil, err
}
defer f.Close()
var params Params
d := json.NewDecoder(f)
err = d.Decode(&params)
if err != nil {
return nil, err
}
return nil, fmt.Errorf("couldn't determine model format")
params.ByteOrder = binary.LittleEndian
return &params, nil
}
// Details on gguf's tokenizer can be found at:
@@ -89,7 +206,7 @@ type Vocab struct {
Types []int32
}
func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
func LoadSentencePieceTokens(dirpath string, vocabSize int) (*Vocab, error) {
slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
if err != nil {
@@ -169,8 +286,8 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
}
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
if params.VocabSize > len(v.Tokens) {
missingTokens := params.VocabSize - len(v.Tokens)
if vocabSize > len(v.Tokens) {
missingTokens := vocabSize - len(v.Tokens)
slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
for cnt := 0; cnt < missingTokens; cnt++ {
v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
@@ -181,3 +298,136 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
return v, nil
}
func GetTensorName(n string) (string, error) {
tMap := map[string]string{
"model.embed_tokens.weight": "token_embd.weight",
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
"lm_head.weight": "output.weight",
"model.norm.weight": "output_norm.weight",
}
v, ok := tMap[n]
if ok {
return v, nil
}
// quick hack to rename the layers to gguf format
for k, v := range tMap {
re := regexp.MustCompile(k)
newName := re.ReplaceAllString(n, v)
if newName != n {
return newName, nil
}
}
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
}
type safetensorWriterTo struct {
t *llm.Tensor
params *Params
bo ByteOrder
filename string
start, end, padding uint64
handler func(w io.Writer, r safetensorWriterTo, f *os.File) error
}
func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
f, err := os.Open(r.filename)
if err != nil {
return 0, err
}
defer f.Close()
if _, err = f.Seek(int64(r.padding+r.start), 0); err != nil {
return 0, err
}
// use the handler if one is present
if r.handler != nil {
return 0, r.handler(w, r, f)
}
remaining := r.end - r.start
bufSize := uint64(10240)
var finished bool
for {
data := make([]byte, min(bufSize, remaining))
b, err := io.ReadFull(f, data)
remaining -= uint64(b)
if err == io.EOF || remaining <= 0 {
finished = true
} else if err != nil {
return 0, err
}
// convert bfloat16 -> ieee float32
tDataF32 := bfloat16.DecodeFloat32(data)
switch r.t.Kind {
case 0:
if err := binary.Write(w, r.bo, tDataF32); err != nil {
return 0, err
}
case 1:
// convert float32 -> float16
tempBuf := make([]uint16, len(data)/2)
for cnt, v := range tDataF32 {
tDataF16 := float16.Fromfloat32(v)
tempBuf[cnt] = uint16(tDataF16)
}
if err := binary.Write(w, binary.LittleEndian, tempBuf); err != nil {
return 0, err
}
}
if finished {
break
}
}
return 0, nil
}
func GetModelArchFromParams(name, dirPath string, params *Params) (ModelArch, error) {
switch len(params.Architectures) {
case 0:
return nil, fmt.Errorf("No architecture specified to convert")
case 1:
switch params.Architectures[0] {
case "MistralForCausalLM":
return &MistralModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
},
}, nil
case "GemmaForCausalLM":
return &GemmaModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
},
}, nil
default:
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
}
}
return nil, fmt.Errorf("Unknown error")
}

View File

@@ -65,14 +65,13 @@ func addOnes(data []float32, vectorSize int) ([]float32, error) {
}
func (m *GemmaModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
t, err := GetSafeTensors(m.Path, m.Params)
if err != nil {
return err
}
slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
m.Tensors = []llm.Tensor{}
for _, l := range t {
if strings.HasSuffix(l.Name, "norm.weight") {
wt := l.WriterTo.(safetensorWriterTo)
@@ -86,7 +85,7 @@ func (m *GemmaModel) GetTensors() error {
}
func (m *GemmaModel) LoadVocab() error {
v, err := LoadSentencePieceTokens(m.Path, m.Params)
v, err := LoadSentencePieceTokens(m.Path, m.Params.VocabSize)
if err != nil {
return err
}

View File

@@ -1,176 +0,0 @@
package convert
import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"os"
"regexp"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type LlamaModel struct {
ModelData
}
func llamaLayerHandler(w io.Writer, r torchWriterTo) error {
slog.Debug(fmt.Sprintf("repacking layer '%s'", r.t.Name))
data := r.storage.(*pytorch.HalfStorage).Data
tData := make([]uint16, len(data))
for cnt, v := range data {
tData[cnt] = uint16(float16.Fromfloat32(v))
}
var err error
var heads uint32
if strings.Contains(r.t.Name, "attn_q") {
heads = uint32(r.params.AttentionHeads)
} else if strings.Contains(r.t.Name, "attn_k") {
heads = uint32(r.params.KeyValHeads)
if heads == 0 {
heads = uint32(r.params.AttentionHeads)
}
} else {
return fmt.Errorf("unknown layer type")
}
slog.Debug(fmt.Sprintf("heads = %d", heads))
tData, err = llamaRepack(tData, int(heads), r.t.Shape)
if err != nil {
return err
}
if err = binary.Write(w, r.bo, tData); err != nil {
return err
}
return nil
}
func llamaRepack(data []uint16, heads int, shape []uint64) ([]uint16, error) {
n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data))
origShape := n.Shape().Clone()
// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(origShape...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
newN, err := native.SelectU16(n, 1)
if err != nil {
return nil, err
}
var fullTensor []uint16
for _, v := range newN {
fullTensor = append(fullTensor, v...)
}
return fullTensor, nil
}
func (m *LlamaModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
if err != nil {
return err
}
m.Tensors = []llm.Tensor{}
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re, err := regexp.Compile(pattern)
if err != nil {
return err
}
for _, l := range t {
matches := re.FindAllStringSubmatch(l.Name, -1)
if len(matches) > 0 {
slog.Debug(fmt.Sprintf("setting handler for: %s", l.Name))
wt := l.WriterTo.(torchWriterTo)
wt.handler = llamaLayerHandler
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
}
return nil
}
func (m *LlamaModel) LoadVocab() error {
var v *Vocab
var err error
slog.Debug("loading vocab")
v, err = LoadSentencePieceTokens(m.Path, m.Params)
if err != nil {
return err
}
slog.Debug("vocab loaded")
m.Vocab = v
return nil
}
func (m *LlamaModel) WriteGGUF() (string, error) {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
"llama.context_length": uint32(m.Params.ContextSize),
"llama.embedding_length": uint32(m.Params.HiddenSize),
"llama.block_count": uint32(m.Params.HiddenLayers),
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
"general.file_type": uint32(1),
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.tokens": m.Vocab.Tokens,
"tokenizer.ggml.scores": m.Vocab.Scores,
"tokenizer.ggml.token_type": m.Vocab.Types,
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
"tokenizer.ggml.unknown_token_id": uint32(0),
"tokenizer.ggml.add_bos_token": true,
"tokenizer.ggml.add_eos_token": false,
}
f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()
mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}
slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))
return f.Name(), nil
}

View File

@@ -97,7 +97,7 @@ func repack(data []uint16, heads int, shape []uint64) ([]uint16, error) {
}
func (m *MistralModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
t, err := GetSafeTensors(m.Path, m.Params)
if err != nil {
return err
}
@@ -124,7 +124,7 @@ func (m *MistralModel) GetTensors() error {
}
func (m *MistralModel) LoadVocab() error {
v, err := LoadSentencePieceTokens(m.Path, m.Params)
v, err := LoadSentencePieceTokens(m.Path, m.Params.VocabSize)
if err != nil {
return err
}

View File

@@ -1,304 +0,0 @@
package convert
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"slices"
"github.com/d4l3k/go-bfloat16"
"github.com/mitchellh/mapstructure"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type safetensorWriterTo struct {
t *llm.Tensor
params *Params
bo ByteOrder
filename string
start, end, padding uint64
handler func(w io.Writer, r safetensorWriterTo, f *os.File) error
}
type tensorMetaData struct {
Type string `mapstructure:"dtype"`
Shape []int `mapstructure:"shape"`
Offsets []int `mapstructure:"data_offsets"`
}
type SafetensorFormat struct{}
func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
slog.Debug("getting tensor data")
var tensors []llm.Tensor
files, err := filepath.Glob(filepath.Join(dirpath, "/model-*.safetensors"))
if err != nil {
return nil, err
}
var offset uint64
for _, f := range files {
var t []llm.Tensor
var err error
t, offset, err = m.readTensors(f, offset, params)
if err != nil {
slog.Error("%v", err)
return nil, err
}
tensors = append(tensors, t...)
}
slog.Debug(fmt.Sprintf("all tensors = %d", len(tensors)))
return tensors, nil
}
func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
f, err := os.Open(fn)
if err != nil {
return nil, 0, err
}
defer f.Close()
var jsonSize uint64
if err := binary.Read(f, binary.LittleEndian, &jsonSize); err != nil {
return nil, 0, err
}
buf := make([]byte, jsonSize)
_, err = io.ReadFull(f, buf)
if err != nil {
return nil, 0, err
}
d := json.NewDecoder(bytes.NewBuffer(buf))
d.UseNumber()
var parsed map[string]interface{}
if err = d.Decode(&parsed); err != nil {
return nil, 0, err
}
var keys []string
for k := range parsed {
keys = append(keys, k)
}
slices.Sort(keys)
slog.Info("converting layers")
var tensors []llm.Tensor
for _, k := range keys {
vals := parsed[k].(map[string]interface{})
var data tensorMetaData
if err = mapstructure.Decode(vals, &data); err != nil {
slog.Error("couldn't decode properly")
return nil, 0, err
}
slog.Debug(fmt.Sprintf("metadata = %#v", data))
var size uint64
var kind uint32
switch len(data.Shape) {
case 0:
// metadata
continue
case 1:
// convert to float32
kind = 0
size = uint64(data.Shape[0] * 4)
case 2:
// convert to float16
kind = 1
size = uint64(data.Shape[0] * data.Shape[1] * 2)
}
ggufName, err := m.GetLayerName(k)
if err != nil {
slog.Error("%v", err)
return nil, 0, err
}
shape := []uint64{0, 0, 0, 0}
for i := range data.Shape {
shape[i] = uint64(data.Shape[i])
}
t := llm.Tensor{
Name: ggufName,
Kind: kind,
Offset: offset,
Shape: shape[:],
}
t.WriterTo = safetensorWriterTo{
t: &t,
params: params,
bo: params.ByteOrder,
filename: fn,
start: uint64(data.Offsets[0]),
end: uint64(data.Offsets[1]),
padding: 8 + jsonSize,
}
tensors = append(tensors, t)
offset += size
}
slog.Debug(fmt.Sprintf("total tensors for file = %d", len(tensors)))
slog.Debug(fmt.Sprintf("offset = %d", offset))
return tensors, offset, nil
}
func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "config.json"))
if err != nil {
return nil, err
}
defer f.Close()
var params Params
d := json.NewDecoder(f)
err = d.Decode(&params)
if err != nil {
return nil, err
}
params.ByteOrder = binary.LittleEndian
return &params, nil
}
func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
directMap := map[string]string{
"model.embed_tokens.weight": "token_embd.weight",
"lm_head.weight": "output.weight",
"model.norm.weight": "output_norm.weight",
}
tMap := map[string]string{
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
}
v, ok := directMap[n]
if ok {
return v, nil
}
// quick hack to rename the layers to gguf format
for k, v := range tMap {
re := regexp.MustCompile(k)
newName := re.ReplaceAllString(n, v)
if newName != n {
return newName, nil
}
}
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
}
func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
f, err := os.Open(r.filename)
if err != nil {
return 0, err
}
defer f.Close()
if _, err = f.Seek(int64(r.padding+r.start), 0); err != nil {
return 0, err
}
// use the handler if one is present
if r.handler != nil {
return 0, r.handler(w, r, f)
}
remaining := r.end - r.start
bufSize := uint64(10240)
var finished bool
for {
data := make([]byte, min(bufSize, remaining))
b, err := io.ReadFull(f, data)
remaining -= uint64(b)
if err == io.EOF || remaining <= 0 {
finished = true
} else if err != nil {
return 0, err
}
// convert bfloat16 -> ieee float32
tDataF32 := bfloat16.DecodeFloat32(data)
switch r.t.Kind {
case 0:
if err := binary.Write(w, r.bo, tDataF32); err != nil {
return 0, err
}
case 1:
// convert float32 -> float16
tempBuf := make([]uint16, len(data)/2)
for cnt, v := range tDataF32 {
tDataF16 := float16.Fromfloat32(v)
tempBuf[cnt] = uint16(tDataF16)
}
if err := binary.Write(w, r.bo, tempBuf); err != nil {
return 0, err
}
}
if finished {
break
}
}
return 0, nil
}
func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
switch len(params.Architectures) {
case 0:
return nil, fmt.Errorf("No architecture specified to convert")
case 1:
switch params.Architectures[0] {
case "MistralForCausalLM":
return &MistralModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
case "GemmaForCausalLM":
return &GemmaModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
default:
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
}
}
return nil, fmt.Errorf("Unknown error")
}

View File

@@ -1,286 +0,0 @@
package convert
import (
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type torchWriterTo struct {
t *llm.Tensor
params *Params
bo ByteOrder
storage pytorch.StorageInterface
handler func(w io.Writer, r torchWriterTo) error
}
type TorchFormat struct{}
func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
slog.Debug("getting torch tensors")
files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin"))
if err != nil {
slog.Error("didn't find any torch files")
return nil, err
}
var offset uint64
var tensors []llm.Tensor
for _, fn := range files {
m, err := pytorch.Load(fn)
if err != nil {
slog.Error(fmt.Sprintf("error unpickling: %q", err))
return []llm.Tensor{}, err
}
for _, k := range m.(*types.Dict).Keys() {
if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
continue
}
t, _ := m.(*types.Dict).Get(k)
tshape := t.(*pytorch.Tensor).Size
var size uint64
var kind uint32
switch len(tshape) {
case 0:
continue
case 1:
// convert to float32
kind = 0
size = uint64(tshape[0] * 4)
case 2:
// convert to float16
kind = 1
size = uint64(tshape[0] * tshape[1] * 2)
}
ggufName, err := tf.GetLayerName(k.(string))
if err != nil {
slog.Error("%v", err)
return nil, err
}
slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName))
shape := []uint64{0, 0, 0, 0}
for i := range tshape {
shape[i] = uint64(tshape[i])
}
tensor := llm.Tensor{
Name: ggufName,
Kind: kind,
Offset: offset, // calculate the offset
Shape: shape[:],
}
tensor.WriterTo = torchWriterTo{
t: &tensor,
params: params,
bo: params.ByteOrder,
storage: t.(*pytorch.Tensor).Source,
}
tensors = append(tensors, tensor)
offset += size
}
}
return tensors, nil
}
func getAltParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "params.json"))
if err != nil {
slog.Error("no params.json")
return nil, err
}
defer f.Close()
type TorchParams struct {
HiddenSize int `json:"dim"`
AttentionHeads int `json:"n_heads"`
KeyValHeads int `json:"n_kv_heads"`
HiddenLayers int `json:"n_layers"`
RopeTheta int `json:"rope_theta"`
NormEPS float64 `json:"norm_eps"`
}
var tparams TorchParams
d := json.NewDecoder(f)
err = d.Decode(&tparams)
if err != nil {
return nil, err
}
params := &Params{
HiddenSize: tparams.HiddenSize,
AttentionHeads: tparams.AttentionHeads,
KeyValHeads: tparams.KeyValHeads,
HiddenLayers: tparams.HiddenLayers,
NormEPS: tparams.NormEPS,
}
switch {
case tparams.RopeTheta == 1000000:
// Codellama
params.ContextSize = 16384
case tparams.NormEPS == 1e-06:
// llama2
slog.Debug("Found llama2 - setting context size to 4096")
params.ContextSize = 4096
default:
params.ContextSize = 2048
}
params.ByteOrder = binary.LittleEndian
return params, nil
}
func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "config.json"))
if err != nil {
if os.IsNotExist(err) {
// try params.json instead
return getAltParams(dirpath)
} else {
return nil, err
}
}
var params Params
d := json.NewDecoder(f)
err = d.Decode(&params)
if err != nil {
return nil, err
}
params.ByteOrder = binary.LittleEndian
return &params, nil
}
func (m *TorchFormat) GetLayerName(n string) (string, error) {
directMap := map[string]string{
"tok_embeddings.weight": "token_embd.weight",
"output.weight": "output.weight",
"norm.weight": "output_norm.weight",
"rope.freqs": "rope_freqs.weight",
"model.embed_tokens.weight": "token_embd.weight",
"lm_head.weight": "output.weight",
"model.norm.weight": "output_norm.weight",
}
lMap := map[string]string{
"layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
"layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
"layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
"layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
"layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
"layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
"layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
"layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
"layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
"layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
}
v, ok := directMap[n]
if ok {
return v, nil
}
// quick hack to rename the layers to gguf format
for k, v := range lMap {
re := regexp.MustCompile(k)
newName := re.ReplaceAllString(n, v)
if newName != n {
return newName, nil
}
}
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
}
func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
// use the handler if one is present
if r.handler != nil {
return 0, r.handler(w, r)
}
switch r.storage.(type) {
case *pytorch.FloatStorage:
slog.Warn(fmt.Sprintf("unexpected storage found for layer '%s'; skipping", r.t.Name))
return 0, nil
case *pytorch.HalfStorage:
switch r.t.Kind {
case 0:
data := r.storage.(*pytorch.HalfStorage).Data
slog.Debug(fmt.Sprintf("%35s F32 (%d)", r.t.Name, len(data)))
if err := binary.Write(w, r.bo, data); err != nil {
return 0, err
}
case 1:
data := r.storage.(*pytorch.HalfStorage).Data
tData := make([]uint16, len(data))
for cnt, v := range data {
tData[cnt] = uint16(float16.Fromfloat32(v))
}
slog.Debug(fmt.Sprintf("%35s F16 (%d)", r.t.Name, len(tData)))
if err := binary.Write(w, r.bo, tData); err != nil {
return 0, err
}
}
}
return 0, nil
}
func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
switch len(params.Architectures) {
case 0:
return nil, fmt.Errorf("No architecture specified to convert")
case 1:
switch params.Architectures[0] {
case "LlamaForCausalLM":
return &LlamaModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
default:
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
}
}
return nil, fmt.Errorf("Unknown error")
}

View File

@@ -23,13 +23,7 @@ export OLLAMA_DEBUG=1
Get the required libraries and build the native LLM code:
```bash
go generate ./...
```
Then build ollama:
```bash
go build .
go run build.go
```
Now you can run `ollama`:
@@ -38,6 +32,16 @@ Now you can run `ollama`:
./ollama
```
### Rebuilding the native code
If at any point you need to rebuild the native code, you can run the
build.go script again using the `-f` flag to force a rebuild, and,
optionally, the `-d` flag to skip building the Go binary:
```bash
go run build.go -f -d
```
### Linux
#### Linux CUDA (NVIDIA)
@@ -53,16 +57,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
Then generate dependencies:
```
go generate ./...
```
Then build the binary:
```
go build .
go run build.go
```
#### Linux ROCm (AMD)
@@ -78,21 +76,17 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
```
go generate ./...
```
Then build the binary:
```
go build .
go run build.go
```
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
#### Advanced CPU Settings
By default, running `go generate ./...` will compile a few different variations
By default, running `go run build.go` will compile a few different variations
of the LLM library based on common CPU families and vector math capabilities,
including a lowest-common-denominator which should run on almost any 64 bit CPU
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
@@ -102,8 +96,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
you might use:
```
OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
go build .
OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
```
#### Containerized Linux Build
@@ -124,8 +117,7 @@ Install required tools:
```powershell
$env:CGO_ENABLED="1"
go generate ./...
go build .
go run build.go
```
#### Windows CUDA (NVIDIA)
@@ -142,4 +134,4 @@ In addition to the common Windows development tools described above, install AMD
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
- [Strawberry Perl](https://strawberryperl.com/)
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).

View File

@@ -139,6 +139,9 @@ PARAMETER <parameter> <parametervalue>
| mirostat_eta | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1) | float | mirostat_eta 0.1 |
| mirostat_tau | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) | float | mirostat_tau 5.0 |
| num_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num_ctx 4096 |
| num_gqa | The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b | int | num_gqa 1 |
| num_gpu | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 50 |
| num_thread | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). | int | num_thread 8 |
| repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 |
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 |
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |

View File

@@ -18,7 +18,7 @@ const ollama = new Ollama({
model: "llama2",
});
const answer = await ollama.invoke(`why is the sky blue?`);
const answer = await ollama.call(`why is the sky blue?`);
console.log(answer);
```

View File

@@ -1,51 +0,0 @@
package main
import (
"context"
"fmt"
"log"
"github.com/ollama/ollama/api"
)
func main() {
client, err := api.ClientFromEnvironment()
if err != nil {
log.Fatal(err)
}
messages := []api.Message{
api.Message{
Role: "system",
Content: "Provide very brief, concise responses",
},
api.Message{
Role: "user",
Content: "Name some unusual animals",
},
api.Message{
Role: "assistant",
Content: "Monotreme, platypus, echidna",
},
api.Message{
Role: "user",
Content: "which of these is the most dangerous?",
},
}
ctx := context.Background()
req := &api.ChatRequest{
Model: "llama2",
Messages: messages,
}
respFunc := func(resp api.ChatResponse) error {
fmt.Print(resp.Message.Content)
return nil
}
err = client.Chat(ctx, req, respFunc)
if err != nil {
log.Fatal(err)
}
}

View File

@@ -1,40 +0,0 @@
package main
import (
"context"
"fmt"
"log"
"github.com/ollama/ollama/api"
)
func main() {
client, err := api.ClientFromEnvironment()
if err != nil {
log.Fatal(err)
}
// By default, GenerateRequest is streaming.
req := &api.GenerateRequest{
Model: "gemma",
Prompt: "how many planets are there?",
}
ctx := context.Background()
respFunc := func(resp api.GenerateResponse) error {
// Only print the response here; GenerateResponse has a number of other
// interesting fields you want to examine.
// In streaming mode, responses are partial so we call fmt.Print (and not
// Println) in order to avoid spurious newlines being introduced. The
// model will insert its own newlines if it wants.
fmt.Print(resp.Response)
return nil
}
err = client.Generate(ctx, req, respFunc)
if err != nil {
log.Fatal(err)
}
fmt.Println()
}

View File

@@ -1,37 +0,0 @@
package main
import (
"context"
"fmt"
"log"
"github.com/ollama/ollama/api"
)
func main() {
client, err := api.ClientFromEnvironment()
if err != nil {
log.Fatal(err)
}
req := &api.GenerateRequest{
Model: "gemma",
Prompt: "how many planets are there?",
// set streaming to false
Stream: new(bool),
}
ctx := context.Background()
respFunc := func(resp api.GenerateResponse) error {
// Only print the response here; GenerateResponse has a number of other
// interesting fields you want to examine.
fmt.Println(resp.Response)
return nil
}
err = client.Generate(ctx, req, respFunc)
if err != nil {
log.Fatal(err)
}
}

View File

@@ -1,47 +0,0 @@
package main
import (
"context"
"fmt"
"log"
"os"
"github.com/ollama/ollama/api"
)
func main() {
if len(os.Args) <= 1 {
log.Fatal("usage: <image name>")
}
imgData, err := os.ReadFile(os.Args[1])
if err != nil {
log.Fatal(err)
}
client, err := api.ClientFromEnvironment()
if err != nil {
log.Fatal(err)
}
req := &api.GenerateRequest{
Model: "llava",
Prompt: "describe this image",
Images: []api.ImageData{imgData},
}
ctx := context.Background()
respFunc := func(resp api.GenerateResponse) error {
// In streaming mode, responses are partial so we call fmt.Print (and not
// Println) in order to avoid spurious newlines being introduced. The
// model will insert its own newlines if it wants.
fmt.Print(resp.Response)
return nil
}
err = client.Generate(ctx, req, respFunc)
if err != nil {
log.Fatal(err)
}
fmt.Println()
}

View File

@@ -1,31 +0,0 @@
package main
import (
"context"
"fmt"
"log"
"github.com/ollama/ollama/api"
)
func main() {
client, err := api.ClientFromEnvironment()
if err != nil {
log.Fatal(err)
}
ctx := context.Background()
req := &api.PullRequest{
Model: "mistral",
}
progressFunc := func(resp api.ProgressResponse) error {
fmt.Printf("Progress: status=%v, total=%v, completed=%v\n", resp.Status, resp.Total, resp.Completed)
return nil
}
err = client.Pull(ctx, req, progressFunc)
if err != nil {
log.Fatal(err)
}
}

View File

@@ -50,7 +50,7 @@ func HumanBytes(b int64) string {
}
}
func HumanBytes2(b uint64) string {
func HumanBytes2(b int64) string {
switch {
case b >= MebiByte:
return fmt.Sprintf("%.1f MiB", float64(b)/MebiByte)

7
go.mod
View File

@@ -19,10 +19,7 @@ require (
golang.org/x/sync v0.3.0
)
require (
github.com/nlpodyssey/gopickle v0.3.0
github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9
)
require github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9
require (
github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc // indirect
@@ -71,7 +68,7 @@ require (
golang.org/x/net v0.17.0 // indirect
golang.org/x/sys v0.13.0
golang.org/x/term v0.13.0
golang.org/x/text v0.14.0 // indirect
golang.org/x/text v0.13.0 // indirect
google.golang.org/protobuf v1.30.0
gopkg.in/yaml.v3 v3.0.1 // indirect
)

6
go.sum
View File

@@ -122,8 +122,6 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/nlpodyssey/gopickle v0.3.0 h1:BLUE5gxFLyyNOPzlXxt6GoHEMMxD0qhsE4p0CIQyoLw=
github.com/nlpodyssey/gopickle v0.3.0/go.mod h1:f070HJ/yR+eLi5WmM1OXJEGaTpuJEUiib19olXgYha0=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9 h1:DV4iXjNn6fGeDl1AkZ1I0QB/0DBjrc7kPpxHrmuDzW4=
@@ -238,8 +236,8 @@ golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View File

@@ -243,7 +243,7 @@ func getCPUMem() (memInfo, error) {
return ret, nil
}
func CheckVRAM() (uint64, error) {
func CheckVRAM() (int64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -251,11 +251,11 @@ func CheckVRAM() (uint64, error) {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return uint64(avail), nil
return avail, nil
}
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
return gpuInfo.FreeMemory, nil
return int64(gpuInfo.FreeMemory), nil
}
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation

View File

@@ -17,7 +17,7 @@ import (
)
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (uint64, error) {
func CheckVRAM() (int64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -25,15 +25,15 @@ func CheckVRAM() (uint64, error) {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return uint64(avail), nil
return avail, nil
}
if runtime.GOARCH == "amd64" {
// gpu not supported, this may not be metal
return 0, nil
}
return uint64(C.getRecommendedMaxVRAM()), nil
recommendedMaxVRAM := int64(C.getRecommendedMaxVRAM())
return recommendedMaxVRAM, nil
}
func GetGPUInfo() GpuInfo {
@@ -53,7 +53,7 @@ func GetGPUInfo() GpuInfo {
func getCPUMem() (memInfo, error) {
return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()),
TotalMemory: 0,
FreeMemory: 0,
DeviceCount: 0,
}, nil

View File

@@ -1,4 +1,3 @@
#import <Metal/Metal.h>
#include <stdint.h>
uint64_t getRecommendedMaxVRAM();
uint64_t getPhysicalMemory();

View File

@@ -1,13 +1,11 @@
// go:build darwin
//go:build darwin
#include "gpu_info_darwin.h"
uint64_t getRecommendedMaxVRAM() {
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
uint64_t result = device.recommendedMaxWorkingSetSize;
CFRelease(device);
return result;
uint64_t getRecommendedMaxVRAM()
{
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
uint64_t result = device.recommendedMaxWorkingSetSize;
CFRelease(device);
return result;
}
uint64_t getPhysicalMemory() {
return [[NSProcessInfo processInfo] physicalMemory];
}

View File

@@ -15,7 +15,7 @@ type GpuInfo struct {
Variant string `json:"variant,omitempty"`
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory uint64 `json:"-"`
MinimumMemory int64 `json:"-"`
// TODO add other useful attributes about the card here for discovery information
}

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be ./llm/generate/
# This script is intended to run inside the `go run build.go` script, which
# sets the working directory to the correct location: ./llm/generate/.
# TODO - add hardening to detect missing tools (cmake, etc.)
@@ -89,10 +89,10 @@ case "${GOARCH}" in
;;
*)
echo "GOARCH must be set"
echo "this script is meant to be run from within go generate"
echo "this script is meant to be run from within 'go run build.go'"
exit 1
;;
esac
cleanup
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be llm/generate/
# This script is intended to run with the `go run build.go` script, which
# sets the working directory to the correct location: ./llm/generate/.
# First we build one or more CPU based LLM libraries
#
@@ -237,4 +237,4 @@ if [ -d "${ROCM_PATH}" ]; then
fi
cleanup
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"

View File

@@ -288,4 +288,4 @@ if ($null -ne $env:HIP_PATH) {
cleanup
write-host "`ngo generate completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
write-host "`ncode generation completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"

View File

@@ -1,3 +0,0 @@
package generate
//go:generate bash ./gen_darwin.sh

View File

@@ -1,3 +0,0 @@
package generate
//go:generate bash ./gen_linux.sh

View File

@@ -1,3 +0,0 @@
package generate
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1

View File

@@ -49,7 +49,7 @@ func (llm *ggla) KV() KV {
return llm.kv
}
func (llm *ggla) Tensors() Tensors {
func (llm *ggla) Tensors() []*Tensor {
return llm.tensors
}

View File

@@ -13,6 +13,16 @@ type GGML struct {
model
}
func (ggml *GGML) LayerSize(prefix string) (n int64) {
for _, t := range ggml.Tensors() {
if strings.HasPrefix(t.Name, prefix) {
n += int64(t.size())
}
}
return
}
const (
fileTypeF32 uint32 = iota
fileTypeF16
@@ -91,7 +101,7 @@ func fileType(fileType uint32) string {
type model interface {
KV() KV
Tensors() Tensors
Tensors() []*Tensor
}
type KV map[string]any
@@ -157,36 +167,6 @@ func (kv KV) ContextLength() uint64 {
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
}
type Tensors []*Tensor
func (ts Tensors) Layers() map[string]Layer {
layers := make(map[string]Layer)
for _, t := range ts {
parts := strings.Split(t.Name, ".")
if parts[0] == "blk" {
parts = parts[1:]
}
if _, ok := layers[parts[0]]; !ok {
layers[parts[0]] = make(Layer)
}
layers[parts[0]][strings.Join(parts[1:], ".")] = t
}
return layers
}
type Layer map[string]*Tensor
func (l Layer) size() (size uint64) {
for _, t := range l {
size += t.size()
}
return size
}
type Tensor struct {
Name string `json:"name"`
Kind uint32 `json:"kind"`
@@ -324,63 +304,49 @@ func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
}, offset, nil
}
func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64) {
embedding := llm.KV().EmbeddingLength()
heads := llm.KV().HeadCount()
headsKV := llm.KV().HeadCountKV()
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
func (llm GGML) GraphSize(context, batch int) (int64, bool) {
embeddingLength := llm.KV().EmbeddingLength()
headCount := llm.KV().HeadCount()
headCountKV := llm.KV().HeadCountKV()
vocabLength := len(llm.KV()["tokenizer.ggml.tokens"].([]any))
layers := llm.Tensors().Layers()
switch llm.KV().Architecture() {
case "llama":
fullOffload = 4 * batch * (1 + 4*embedding + context*(1+heads))
partialOffload = 4 * batch * embedding
partialOffload += max(
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embedding/heads*headsKV),
4*batch*(embedding+vocab)+embedding*vocab*105/128,
)
if ffnGateWeight, ok := layers["0"]["ffn_gate.0.weight"]; ok {
ffnGateWeight1 := ffnGateWeight.Shape[1]
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
partialOffload = max(
4*batch*(3+embedding/heads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
)
var attnQKVWeight1 uint64 = 0
for _, t := range llm.Tensors() {
if strings.HasSuffix(t.Name, ".attn_qkv.weight") && len(t.Shape) >= 2 {
attnQKVWeight1 = t.Shape[1]
break
}
case "gemma":
fullOffload = 4 * batch * (embedding + vocab)
partialOffload = 4*batch*(2*embedding+vocab+1) + embedding*vocab*105/128
case "command-r":
fullOffload = max(
4*batch*(embedding+vocab),
4*batch*(2+4*embedding+context*(1+heads)),
)
partialOffload = max(
4*batch*(embedding+vocab)+embedding*vocab*105/128,
4*batch*(1+2*embedding+context*(1+heads))+4*embedding*context+embedding*embedding*9/16,
)
case "qwen2":
fullOffload = max(
4*batch*(embedding+vocab),
4*batch*(1+2*embedding+context+context*heads),
)
partialOffload = max(
4*batch*(embedding+vocab)+embedding*vocab*105/128,
4*(batch*(1+2*embedding+context*(1+heads))+embedding*(1+context)),
)
case "phi2":
fullOffload = max(
4*batch*(embedding+vocab),
4*batch*(1+4*embedding+context+context*heads),
)
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
}
return
var ffnGate1 uint64 = 0
for _, t := range llm.Tensors() {
if strings.Index(t.Name, ".ffn_gate") > 0 && len(t.Shape) >= 2 {
ffnGate1 = t.Shape[1]
break
}
}
switch llm.KV().Architecture() {
case "gemma", "command-r":
return 4 * int64(batch) * int64(embeddingLength+uint64(vocabLength)), true
case "phi2":
return max(
4*int64(batch)*int64(embeddingLength+uint64(vocabLength)),
4*int64(batch)*int64(1+4*embeddingLength+uint64(context)+attnQKVWeight1+uint64(context)*headCount),
), true
case "qwen2":
return max(
4*int64(batch)*int64(embeddingLength+uint64(vocabLength)),
4*int64(batch)*int64(1+2*embeddingLength+uint64(context)+uint64(context)*headCount),
), true
case "llama":
if ffnGate1 > 0 {
// moe
return 4 * int64(batch) * int64(2+3*embeddingLength+uint64(context)+uint64(context)*headCount+2*headCountKV+ffnGate1), true
}
return 4 * int64(batch) * int64(1+4*embeddingLength+uint64(context)+uint64(context)*headCount), true
}
return 0, false
}

View File

@@ -6,8 +6,6 @@ import (
"fmt"
"io"
"strings"
"log/slog"
)
type containerGGUF struct {
@@ -54,7 +52,6 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
}
model := newGGUF(c)
slog.Debug(fmt.Sprintf("model = %#v", model))
if err := model.Decode(rs); err != nil {
return nil, err
}
@@ -112,7 +109,7 @@ func (llm *gguf) KV() KV {
return llm.kv
}
func (llm *gguf) Tensors() Tensors {
func (llm *gguf) Tensors() []*Tensor {
return llm.tensors
}
@@ -190,8 +187,6 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
llm.kv[k] = v
}
slog.Debug(fmt.Sprintf("general.architecture = %s", llm.kv["general.architecture"]))
// decode tensors
for i := 0; uint64(i) < llm.numTensor(); i++ {
name, err := readGGUFString(llm, rs)
@@ -248,7 +243,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
}
padding := llm.padding(offset, int64(alignment))
if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
return err
}
@@ -456,7 +451,6 @@ var ggufKVOrder = map[string][]string{
"llama": {
"general.architecture",
"general.name",
"llama.vocab_size",
"llama.context_length",
"llama.embedding_length",
"llama.block_count",
@@ -515,17 +509,11 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
return err
}
kvCheck := make(map[string]bool)
for k := range kv {
kvCheck[k] = false
}
for _, k := range ggufKVOrder["llama"] {
v, ok := kv[k]
if !ok {
continue
}
kvCheck[k] = true
if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
return err
@@ -579,12 +567,6 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
}
}
for k, v := range kvCheck {
if !v {
return fmt.Errorf("Didn't know how to write kv %s", k)
}
}
for _, tensor := range tensors {
if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
return err

View File

@@ -6,81 +6,10 @@ package llm
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
// #include <stdlib.h>
// #include "llama.h"
import "C"
import (
"fmt"
"unsafe"
)
// SystemInfo is an unused example of calling llama.cpp functions using CGo
func SystemInfo() string {
return C.GoString(C.llama_print_system_info())
}
func Quantize(infile, outfile, filetype string) error {
cinfile := C.CString(infile)
defer C.free(unsafe.Pointer(cinfile))
coutfile := C.CString(outfile)
defer C.free(unsafe.Pointer(coutfile))
params := C.llama_model_quantize_default_params()
params.nthread = -1
switch filetype {
case "F32":
params.ftype = fileTypeF32
case "F16":
params.ftype = fileTypeF16
case "Q4_0":
params.ftype = fileTypeQ4_0
case "Q4_1":
params.ftype = fileTypeQ4_1
case "Q4_1_F16":
params.ftype = fileTypeQ4_1_F16
case "Q8_0":
params.ftype = fileTypeQ8_0
case "Q5_0":
params.ftype = fileTypeQ5_0
case "Q5_1":
params.ftype = fileTypeQ5_1
case "Q2_K":
params.ftype = fileTypeQ2_K
case "Q3_K_S":
params.ftype = fileTypeQ3_K_S
case "Q3_K_M":
params.ftype = fileTypeQ3_K_M
case "Q3_K_L":
params.ftype = fileTypeQ3_K_L
case "Q4_K_S":
params.ftype = fileTypeQ4_K_S
case "Q4_K_M":
params.ftype = fileTypeQ4_K_M
case "Q5_K_S":
params.ftype = fileTypeQ5_K_S
case "Q5_K_M":
params.ftype = fileTypeQ5_K_M
case "Q6_K":
params.ftype = fileTypeQ6_K
case "IQ2_XXS":
params.ftype = fileTypeIQ2_XXS
case "IQ2_XS":
params.ftype = fileTypeIQ2_XS
case "Q2_K_S":
params.ftype = fileTypeQ2_K_S
case "Q3_K_XS":
params.ftype = fileTypeQ3_K_XS
case "IQ3_XXS":
params.ftype = fileTypeIQ3_XXS
default:
return fmt.Errorf("unknown filetype: %s", filetype)
}
if retval := C.llama_model_quantize(cinfile, coutfile, &params); retval != 0 {
return fmt.Errorf("llama_model_quantize: %d", retval)
}
return nil
}

View File

@@ -17,6 +17,7 @@ import (
"os/exec"
"path/filepath"
"runtime"
"slices"
"strconv"
"strings"
"time"
@@ -35,7 +36,15 @@ type LlamaServer struct {
options api.Options
}
var cpuOnlyFamilies = []string{
"mamba",
}
func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
}
f, err := os.Open(model)
if err != nil {
return nil, err
@@ -56,83 +65,67 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
opts.NumCtx = 4
}
memoryAvailable, _ := gpu.CheckVRAM()
availableMemory, _ := gpu.CheckVRAM()
info := gpu.GetGPUInfo()
memoryMinimum := info.MinimumMemory
usedMemory := info.MinimumMemory
for _, projector := range projectors {
memoryMinimum += projectorMemoryRequirements(projector)
usedMemory += projectorMemoryRequirements(projector)
// multimodal models require at least 2048 context
opts.NumCtx = max(opts.NumCtx, 2048)
}
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()
kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.KV().BlockCount()) * int64(ggml.KV().EmbeddingLength()) / int64(ggml.KV().HeadCount()) * int64(ggml.KV().HeadCountKV())
graphPartialOffload, graphFullOffload := ggml.GraphSize(uint64(opts.NumCtx), uint64(min(opts.NumCtx, opts.NumBatch)))
if graphPartialOffload == 0 {
graphPartialOffload = ggml.KV().GQA() * kv / 6
graph, ok := ggml.GraphSize(opts.NumCtx, min(opts.NumCtx, opts.NumBatch))
if !ok {
graph = int64(ggml.KV().GQA()) * kv / 6
}
if graphFullOffload == 0 {
graphFullOffload = graphPartialOffload
usedMemory += graph
if (usedMemory > availableMemory || slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture())) && info.Library != "metal" {
info.Library = "cpu"
}
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload
requiredMemory := usedMemory
// memoryRequiredPartial represents the memory required for partial GPU offloading (n > 0, n < layers)
memoryRequiredPartial := memoryMinimum + graphPartialOffload
if info.Library != "metal" {
if memoryRequiredPartial > memoryAvailable {
info.Library = "cpu"
}
}
var layerCount int
layers := ggml.Tensors().Layers()
var layers int
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
memoryLayer := layers[fmt.Sprintf("%d", i)].size()
layerMemory := ggml.LayerSize(fmt.Sprintf("blk.%d.", i)) + kv/int64(ggml.KV().BlockCount())
requiredMemory += layerMemory
// KV is proportional to the number of layers
memoryLayer += kv / ggml.KV().BlockCount()
memoryRequiredTotal += memoryLayer
if memoryAvailable > memoryRequiredPartial+memoryLayer {
memoryRequiredPartial += memoryLayer
layerCount++
if availableMemory > usedMemory+layerMemory && (opts.NumGPU < 0 || layers < opts.NumGPU) {
usedMemory += layerMemory
layers++
}
}
memoryLayerOutput := layers["output"].size()
memoryRequiredTotal += memoryLayerOutput
memOutputLayer := ggml.LayerSize("output.")
requiredMemory += memOutputLayer
if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
// disable partial offloading when model is greater than total system memory
opts.NumGPU = 0
} else if memoryAvailable > memoryRequiredTotal {
layerCount = int(ggml.KV().BlockCount()) + 1
memoryRequiredPartial = memoryRequiredTotal
}
if opts.NumGPU < 0 {
opts.NumGPU = layerCount
// only offload output layer if all repeating layers are offloaded
if layers >= int(ggml.KV().BlockCount()) && availableMemory > usedMemory+memOutputLayer {
usedMemory += memOutputLayer
layers++
}
slog.Info(
"offload to gpu",
"reallayers", opts.NumGPU,
"layers", layerCount,
"required", format.HumanBytes2(memoryRequiredTotal),
"used", format.HumanBytes2(memoryRequiredPartial),
"available", format.HumanBytes2(memoryAvailable),
"layers", layers,
"required", format.HumanBytes2(requiredMemory),
"used", format.HumanBytes2(usedMemory),
"available", format.HumanBytes2(availableMemory),
"kv", format.HumanBytes2(kv),
"fulloffload", format.HumanBytes2(graphFullOffload),
"partialoffload", format.HumanBytes2(graphPartialOffload),
"graph", format.HumanBytes2(graph),
)
if opts.NumGPU < 0 && info.Library != "cpu" {
opts.NumGPU = layers
}
if len(adapters) > 1 {
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
}
@@ -276,6 +269,12 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
_ = s.cmd.Wait()
}()
if err = s.waitUntilRunning(); err != nil {
slog.Error("error starting llama server", "server", servers[i], "error", err)
s.Close()
finalErr = err
continue
}
return s, nil
}
@@ -283,7 +282,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
return nil, finalErr
}
func projectorMemoryRequirements(filename string) uint64 {
func projectorMemoryRequirements(filename string) int64 {
file, err := os.Open(filename)
if err != nil {
return 0
@@ -295,12 +294,18 @@ func projectorMemoryRequirements(filename string) uint64 {
return 0
}
var mem uint64
for _, layer := range ggml.Tensors().Layers() {
mem += layer.size()
prefixes := make(map[string]struct{})
for _, layer := range ggml.Tensors() {
parts := strings.Split(layer.Name, ".")
prefixes[strings.Join(parts[:2], ".")] = struct{}{}
}
return mem
var ask int64
for prefix := range prefixes {
ask += ggml.LayerSize(prefix)
}
return ask
}
type ServerStatus int
@@ -376,10 +381,9 @@ func (s *LlamaServer) Ping(ctx context.Context) error {
return nil
}
func (s *LlamaServer) WaitUntilRunning() error {
func (s *LlamaServer) waitUntilRunning() error {
start := time.Now()
// TODO we need to wire up a better way to detect hangs during model load and startup of the server
expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load
expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load
ticker := time.NewTicker(50 * time.Millisecond)
defer ticker.Stop()

View File

@@ -14,7 +14,7 @@ go build .
Then run the desktop app with `npm start`:
```
cd macapp
cd app
npm install
npm start
```

View File

@@ -247,8 +247,7 @@ func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w
}
if !part.lastUpdated.IsZero() && time.Since(part.lastUpdated) > 5*time.Second {
const msg = "%s part %d stalled; retrying. If this persists, press ctrl-c to exit, then 'ollama pull' to find a faster connection."
slog.Info(fmt.Sprintf(msg, b.Digest[7:19], part.N))
slog.Info(fmt.Sprintf("%s part %d stalled; retrying", b.Digest[7:19], part.N))
// reset last updated
part.lastUpdated = time.Time{}
return errPartStalled

View File

@@ -284,7 +284,7 @@ func realpath(mfDir, from string) string {
return abspath
}
func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
func CreateModel(ctx context.Context, name, modelFileDir string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
deleteMap := make(map[string]struct{})
if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
for _, layer := range append(manifest.Layers, manifest.Config) {
@@ -322,7 +322,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
pathName := realpath(modelFileDir, c.Args)
ggufName, err := convertModel(name, pathName, fn)
ggufName, err := convertSafetensors(name, pathName, fn)
if err != nil {
var pathErr *fs.PathError
switch {
@@ -337,27 +337,8 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
if ggufName != "" {
pathName = ggufName
slog.Debug(fmt.Sprintf("new image layer path: %s", pathName))
defer os.RemoveAll(ggufName)
if quantization != "" {
quantization = strings.ToUpper(quantization)
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", "F16", quantization)})
tempfile, err := os.CreateTemp(filepath.Dir(ggufName), quantization)
if err != nil {
return err
}
defer os.RemoveAll(tempfile.Name())
if err := llm.Quantize(ggufName, tempfile.Name(), quantization); err != nil {
return err
}
if err := tempfile.Close(); err != nil {
return err
}
pathName = tempfile.Name()
}
}
bin, err := os.Open(pathName)
@@ -633,7 +614,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
return nil
}
func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string, error) {
func convertSafetensors(name, path string, fn func(resp api.ProgressResponse)) (string, error) {
r, err := zip.OpenReader(path)
if err != nil {
return "", err
@@ -668,22 +649,17 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
rc.Close()
}
mf, err := convert.GetModelFormat(tempDir)
params, err := convert.GetParams(tempDir)
if err != nil {
return "", err
}
params, err := mf.GetParams(tempDir)
mArch, err := convert.GetModelArchFromParams(name, tempDir, params)
if err != nil {
return "", err
}
mArch, err := mf.GetModelArch(name, tempDir, params)
if err != nil {
return "", err
}
fn(api.ProgressResponse{Status: "processing tensors"})
fn(api.ProgressResponse{Status: "processing safetensors"})
if err := mArch.GetTensors(); err != nil {
return "", err
}

View File

@@ -68,18 +68,6 @@ var loaded struct {
var defaultSessionDuration = 5 * time.Minute
func unload() {
if loaded.llama != nil {
loaded.llama.Close()
}
loaded.llama = nil
loaded.model = ""
loaded.adapters = nil
loaded.projectors = nil
loaded.Options = nil
}
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
ctx, cancel := context.WithTimeout(c, 10*time.Second)
@@ -95,7 +83,12 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
if needLoad {
if loaded.llama != nil {
slog.Info("changing loaded model")
unload()
loaded.llama.Close()
loaded.llama = nil
loaded.model = ""
loaded.adapters = nil
loaded.projectors = nil
loaded.Options = nil
}
llama, err := llm.NewLlamaServer(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
@@ -115,19 +108,22 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
loaded.projectors = model.ProjectorPaths
loaded.llama = llama
loaded.Options = &opts
if err = llama.WaitUntilRunning(); err != nil {
slog.Error("error loading llama server", "error", err)
unload()
return err
}
}
if loaded.expireTimer == nil {
loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
loaded.mu.Lock()
defer loaded.mu.Unlock()
unload()
if loaded.llama != nil {
loaded.llama.Close()
}
loaded.llama = nil
loaded.model = ""
loaded.adapters = nil
loaded.projectors = nil
loaded.Options = nil
})
}
@@ -651,7 +647,7 @@ func CreateModelHandler(c *gin.Context) {
ctx, cancel := context.WithCancel(c.Request.Context())
defer cancel()
if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
if err := CreateModel(ctx, model, filepath.Dir(req.Path), commands, fn); err != nil {
ch <- gin.H{"error": err.Error()}
}
}()
@@ -917,24 +913,6 @@ func HeadBlobHandler(c *gin.Context) {
}
func CreateBlobHandler(c *gin.Context) {
path, err := GetBlobsPath(c.Param("digest"))
if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
_, err = os.Stat(path)
switch {
case errors.Is(err, os.ErrNotExist):
// noop
case err != nil:
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
default:
c.Status(http.StatusOK)
return
}
layer, err := NewLayer(c.Request.Body, "")
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@@ -1150,7 +1128,9 @@ func Serve(ln net.Listener) error {
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-signals
unload()
if loaded.llama != nil {
loaded.llama.Close()
}
gpu.Cleanup()
os.Exit(0)
}()

View File

@@ -61,7 +61,7 @@ func Test_Routes(t *testing.T) {
fn := func(resp api.ProgressResponse) {
t.Logf("Status: %s", resp.Status)
}
err = CreateModel(context.TODO(), name, "", "", commands, fn)
err = CreateModel(context.TODO(), name, "", commands, fn)
assert.Nil(t, err)
}

View File

@@ -1,83 +0,0 @@
package model
import (
"fmt"
"log/slog"
"strings"
"unicode"
)
// Digest represents a digest of a model Manifest. It is a comparable value
// type and is immutable.
//
// The zero Digest is not a valid digest.
type Digest struct {
s string
}
// Type returns the digest type of the digest.
//
// Example:
//
// ParseDigest("sha256-1234").Type() // returns "sha256"
func (d Digest) Type() string {
typ, _, _ := strings.Cut(d.s, "-")
return typ
}
// String returns the digest in the form of "<digest-type>-<digest>", or the
// empty string if the digest is invalid.
func (d Digest) String() string { return d.s }
// IsValid returns true if the digest is valid (not zero).
//
// A valid digest may be created only by ParseDigest, or
// ParseName(name).Digest().
func (d Digest) IsValid() bool { return d.s != "" }
// LogValue implements slog.Value.
func (d Digest) LogValue() slog.Value {
return slog.StringValue(d.String())
}
var (
_ slog.LogValuer = Digest{}
)
// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
// Digest.
func ParseDigest(s string) Digest {
typ, digest, ok := strings.Cut(s, "-")
if !ok {
typ, digest, ok = strings.Cut(s, ":")
}
if ok && isValidDigestType(typ) && isValidHex(digest) {
return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
}
return Digest{}
}
func isValidDigestType(s string) bool {
if len(s) == 0 {
return false
}
for _, r := range s {
if !unicode.IsLower(r) && !unicode.IsDigit(r) {
return false
}
}
return true
}
func isValidHex(s string) bool {
if len(s) == 0 {
return false
}
for i := range s {
c := s[i]
if c < '0' || c > '9' && c < 'a' || c > 'f' {
return false
}
}
return true
}

View File

@@ -1,46 +0,0 @@
package model
import "testing"
var testDigests = map[string]Digest{
"": {},
"sha256-1234": {s: "sha256-1234"},
"sha256-5678": {s: "sha256-5678"},
"blake2-9abc": {s: "blake2-9abc"},
"-1234": {},
"sha256-": {},
"sha256-1234-5678": {},
"sha256-P": {}, // invalid hex
"sha256-1234P": {},
"---": {},
}
func TestDigestParse(t *testing.T) {
// Test cases.
for s, want := range testDigests {
got := ParseDigest(s)
t.Logf("ParseDigest(%q) = %#v", s, got)
if got != want {
t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
}
}
}
func TestDigestString(t *testing.T) {
// Test cases.
for s, d := range testDigests {
want := s
if !d.IsValid() {
want = ""
}
got := d.String()
if got != want {
t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
}
got = ParseDigest(s).String()
if got != want {
t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
}
}
}

View File

@@ -1,688 +0,0 @@
package model
import (
"cmp"
"errors"
"fmt"
"hash/maphash"
"io"
"log/slog"
"path/filepath"
"slices"
"strings"
"sync"
"github.com/ollama/ollama/types/structs"
)
// Errors
var (
// ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not
// used by this package, but are exported so that other packages can
// use them, instead of defining their own errors for them.
ErrInvalidName = errors.New("invalid model name")
ErrIncompleteName = errors.New("incomplete model name")
ErrInvalidDigest = errors.New("invalid digest")
)
// Defaults
const (
// MaskDefault is the default mask used by [Name.DisplayShortest].
MaskDefault = "registry.ollama.ai/library/?:latest"
// MaskNothing is a mask that masks nothing.
MaskNothing = "?/?/?:?"
// DefaultFill is the default fill used by [ParseName].
FillDefault = "registry.ollama.ai/library/?:latest+Q4_0"
// FillNothing is a fill that fills nothing.
FillNothing = "?/?/?:?+?"
)
const MaxNamePartLen = 128
type PartKind int
// Levels of concreteness
const (
// Each value aligns with its index in the Name.parts array.
PartHost PartKind = iota
PartNamespace
PartModel
PartTag
PartBuild
PartDigest
// NumParts is the number of parts in a Name. In this list, it must
// follow the final part.
NumParts
PartExtraneous = -1
)
var kindNames = map[PartKind]string{
PartHost: "Host",
PartNamespace: "Namespace",
PartModel: "Name",
PartTag: "Tag",
PartBuild: "Build",
PartDigest: "Digest",
}
func (k PartKind) String() string {
return cmp.Or(kindNames[k], "Unknown")
}
// Name is an opaque reference to a model. It holds the parts of a model
// with the case preserved, but is not directly comparable with other Names
// since model names can be represented with different casing depending on
// the use case. For instance, "Mistral" and "mistral" are the same model
// but each version may have come from different sources (e.g. copied from a
// Web page, or from a file path).
//
// Valid Names can ONLY be constructed by calling [ParseName].
//
// A Name is valid if and only if is have a valid Model part. The other parts
// are optional.
//
// A Name is considered "complete" if it has all parts present. To check if a
// Name is complete, use [Name.IsComplete].
//
// To compare two names in a case-insensitive manner, use [Name.EqualFold].
//
// The parts of a Name are:
//
// - Host: the domain of the model (optional)
// - Namespace: the namespace of the model (optional)
// - Model: the name of the model (required)
// - Tag: the tag of the model (optional)
// - Build: the build of the model; usually the quantization or "file type" (optional)
//
// The parts can be obtained in their original form by calling [Name.Parts].
//
// To check if a Name has at minimum a valid model part, use [Name.IsValid].
type Name struct {
_ structs.Incomparable
parts [NumParts]string // host, namespace, model, tag, build, digest
// TODO(bmizerany): track offsets and hold s (raw string) here? We
// could pack the offsets all into a single uint64 since the first
// parts take less bits since their max offset is less than the max
// offset of the next part. This would save a ton of bytes per Name
// and mean zero allocations for String.
}
// ParseName parses s into a Name, and returns the result of filling it with
// defaults. The input string must be a valid string
// representation of a model name in the form:
//
// [host/][namespace/]<model>[:tag][+build][@<digest-type>-<digest>]
//
// The name part is required, all others are optional. If a part is missing,
// it is left empty in the returned Name. If a part is invalid, the zero Ref
// value is returned.
//
// The build part is normalized to uppercase.
//
// Examples of valid paths:
//
// "example.com/library/mistral:7b+x"
// "example.com/eva/mistral:7b+Q4_0"
// "mistral:7b+x"
// "example.com/mike/mistral:latest+Q4_0"
// "example.com/bruce/mistral:latest"
// "example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef"
//
// Examples of invalid paths:
//
// "example.com/mistral:7b+"
// "example.com/mistral:7b+Q4_0+"
// "x/y/z/z:8n+I"
// ""
//
// It returns the zero value if any part is invalid.
//
// # Fills
//
// For any valid s, the fill string is used to fill in missing parts of the
// Name. The fill string must be a valid Name with the exception that any part
// may be the string ("?"), which will not be considered for filling.
func ParseName(s, fill string) Name {
var r Name
parts(s)(func(kind PartKind, part string) bool {
if kind == PartDigest && !ParseDigest(part).IsValid() {
r = Name{}
return false
}
if kind == PartExtraneous || !isValidPart(kind, part) {
r = Name{}
return false
}
r.parts[kind] = part
return true
})
if r.IsValid() || r.IsResolved() {
return fillName(r, fill)
}
return Name{}
}
func parseMask(s string) Name {
var r Name
parts(s)(func(kind PartKind, part string) bool {
if part == "?" {
// mask part; treat as empty but valid
return true
}
if !isValidPart(kind, part) {
panic(fmt.Errorf("invalid mask part %s: %q", kind, part))
}
r.parts[kind] = part
return true
})
return r
}
func MustParseName(s, fill string) Name {
r := ParseName(s, fill)
if !r.IsValid() {
panic("invalid Name: " + s)
}
return r
}
// fillName fills in the missing parts of dst with the parts of src.
//
// The returned Name will only be valid if dst is valid.
//
// It skipps fill parts that are "?".
func fillName(r Name, fill string) Name {
fill = cmp.Or(fill, FillDefault)
f := parseMask(fill)
if fill != FillNothing && f.IsZero() {
panic("invalid fill")
}
for i := range r.parts {
if f.parts[i] == "?" {
continue
}
r.parts[i] = cmp.Or(r.parts[i], f.parts[i])
}
return r
}
// WithBuild returns a copy of r with the build set to the given string.
func (r Name) WithBuild(build string) Name {
r.parts[PartBuild] = build
return r
}
func (r Name) WithDigest(digest Digest) Name {
r.parts[PartDigest] = digest.String()
return r
}
var mapHashSeed = maphash.MakeSeed()
// MapHash returns a case insensitive hash for use in maps and equality
// checks. For a convenient way to compare names, use [Name.EqualFold].
//
//nolint:errcheck
func (r Name) MapHash() uint64 {
// correctly hash the parts with case insensitive comparison
var h maphash.Hash
h.SetSeed(mapHashSeed)
for _, part := range r.parts {
// downcase the part for hashing
for i := range part {
c := part[i]
if c >= 'A' && c <= 'Z' {
c = c - 'A' + 'a'
}
h.WriteByte(c)
}
}
return h.Sum64()
}
func (r Name) slice(from, to PartKind) Name {
var v Name
copy(v.parts[from:to+1], r.parts[from:to+1])
return v
}
// DisplayShortest returns the shortest possible, masked display string in form:
//
// [host/][<namespace>/]<model>[:<tag>]
//
// # Masks
//
// The mask is a string that specifies which parts of the name to omit based
// on case-insensitive comparison. [Name.DisplayShortest] omits parts of the name
// that are the same as the mask, moving from left to right until the first
// unequal part is found. It then moves right to left until the first unequal
// part is found. The result is the shortest possible display string.
//
// Unlike a [Name] the mask can contain "?" characters which are treated as
// wildcards. A "?" will never match a part of the name, since a valid name
// can never contain a "?" character.
//
// For example: Given a Name ("registry.ollama.ai/library/mistral:latest") masked
// with ("registry.ollama.ai/library/?:latest") will produce the display string
// ("mistral").
//
// If mask is the empty string, then [MaskDefault] is used.
//
// DisplayShortest panics if the mask is not the empty string, MaskNothing, and
// invalid.
//
// # Builds
//
// For now, DisplayShortest does consider the build or return one in the
// result. We can lift this restriction when needed.
func (r Name) DisplayShortest(mask string) string {
mask = cmp.Or(mask, MaskDefault)
d := parseMask(mask)
if mask != MaskNothing && r.IsZero() {
panic("invalid Name")
}
for i := range PartTag {
if !strings.EqualFold(r.parts[i], d.parts[i]) {
break
}
r.parts[i] = ""
}
for i := PartTag; i >= 0; i-- {
if !strings.EqualFold(r.parts[i], d.parts[i]) {
break
}
r.parts[i] = ""
}
return r.slice(PartHost, PartTag).DisplayLong()
}
// DisplayLongest returns the result of r.DisplayShortest(MaskNothing).
func (r Name) DisplayLongest() string {
return r.DisplayShortest(MaskNothing)
}
var seps = [...]string{
PartHost: "/",
PartNamespace: "/",
PartModel: ":",
PartTag: "+",
PartBuild: "@",
PartDigest: "",
}
// WriteTo implements io.WriterTo. It writes the fullest possible display
// string in form:
//
// <host>/<namespace>/<model>:<tag>+<build>@<digest-type>-<digest>
//
// Missing parts and their separators are not written.
//
// The full digest is always prefixed with "@". That is if [Name.IsValid]
// reports false and [Name.IsResolved] reports true, then the string is
// returned as "@<digest-type>-<digest>".
func (r Name) writeTo(w io.StringWriter) error {
var partsWritten int
for i := range r.parts {
if r.parts[i] == "" {
continue
}
if partsWritten > 0 || i == int(PartDigest) {
if _, err := w.WriteString(seps[i-1]); err != nil {
return err
}
}
if _, err := w.WriteString(r.parts[i]); err != nil {
return err
}
partsWritten++
}
return nil
}
var builderPool = sync.Pool{
New: func() interface{} {
return &strings.Builder{}
},
}
// DisplayLong returns the fullest possible display string in form:
//
// <host>/<namespace>/<model>:<tag>+<build>
//
// If any part is missing, it is omitted from the display string.
func (r Name) DisplayLong() string {
b := builderPool.Get().(*strings.Builder)
defer builderPool.Put(b)
b.Reset()
b.Grow(50) // arbitrarily long enough for most names
_ = r.writeTo(b)
return b.String()
}
// GoString implements fmt.GoStringer. It returns a string suitable for
// debugging and logging. It is similar to [Name.DisplayLong] but it always
// returns a string that includes all parts of the Name, with missing parts
// replaced with a ("?").
func (r Name) GoString() string {
for i := range r.parts {
r.parts[i] = cmp.Or(r.parts[i], "?")
}
return r.DisplayLong()
}
// LogValue implements slog.Valuer.
func (r Name) LogValue() slog.Value {
return slog.StringValue(r.GoString())
}
// IsComplete reports whether the Name is fully qualified. That is it has a
// domain, namespace, name, tag, and build.
func (r Name) IsComplete() bool {
return !slices.Contains(r.parts[:PartDigest], "")
}
// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the
// build part to be present.
func (r Name) IsCompleteNoBuild() bool {
return !slices.Contains(r.parts[:PartBuild], "")
}
// IsResolved reports true if the Name has a valid digest.
//
// It is possible to have a valid Name, or a complete Name that is not
// resolved.
func (r Name) IsResolved() bool {
return r.Digest().IsValid()
}
// Digest returns the digest part of the Name, if any.
//
// If Digest returns a non-empty string, then [Name.IsResolved] will return
// true, and digest is considered valid.
func (r Name) Digest() Digest {
// This was already validated by ParseName, so we can just return it.
return Digest{r.parts[PartDigest]}
}
// EqualFold reports whether r and o are equivalent model names, ignoring
// case.
func (r Name) EqualFold(o Name) bool {
return r.CompareFold(o) == 0
}
// CompareFold performs a case-insensitive cmp.Compare on r and o.
//
// This can be used with [slices.SortFunc].
//
// For simple equality checks, use [Name.EqualFold].
func (r Name) CompareFold(o Name) int {
return slices.CompareFunc(r.parts[:], o.parts[:], compareFold)
}
func compareFold(a, b string) int {
return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int {
return cmp.Compare(downcase(a), downcase(b))
})
}
func downcase(r rune) rune {
if r >= 'A' && r <= 'Z' {
return r - 'A' + 'a'
}
return r
}
func (r Name) Host() string { return r.parts[PartHost] }
func (r Name) Namespace() string { return r.parts[PartNamespace] }
func (r Name) Model() string { return r.parts[PartModel] }
func (r Name) Build() string { return r.parts[PartBuild] }
func (r Name) Tag() string { return r.parts[PartTag] }
// iter_Seq2 is a iter.Seq2 defined here to avoid the current build
// restrictions in the go1.22 iter package requiring the
// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag,
// which we are not yet ready to support.
//
// Once we are ready to support rangefunc, this can be removed and replaced
// with the iter.Seq2 type.
type iter_Seq2[A, B any] func(func(A, B) bool)
// Parts returns a sequence of the parts of a Name string from most specific
// to least specific.
//
// It normalizes the input string by removing "http://" and "https://" only.
// No other normalizations are performed.
func parts(s string) iter_Seq2[PartKind, string] {
return func(yield func(PartKind, string) bool) {
if strings.HasPrefix(s, "http://") {
s = strings.TrimPrefix(s, "http://")
} else {
s = strings.TrimPrefix(s, "https://")
}
if len(s) > MaxNamePartLen || len(s) == 0 {
return
}
numConsecutiveDots := 0
partLen := 0
state, j := PartDigest, len(s)
for i := len(s) - 1; i >= 0; i-- {
if partLen++; partLen > MaxNamePartLen {
// catch a part that is too long early, so
// we don't keep spinning on it, waiting for
// an isInValidPart check which would scan
// over it again.
yield(state, s[i+1:j])
return
}
switch s[i] {
case '@':
switch state {
case PartDigest:
if !yield(PartDigest, s[i+1:j]) {
return
}
if i == 0 {
// This is the form
// "@<digest>" which is valid.
//
// We're done.
return
}
state, j, partLen = PartBuild, i, 0
default:
yield(PartExtraneous, s[i+1:j])
return
}
case '+':
switch state {
case PartBuild, PartDigest:
if !yield(PartBuild, s[i+1:j]) {
return
}
state, j, partLen = PartTag, i, 0
default:
yield(PartExtraneous, s[i+1:j])
return
}
case ':':
switch state {
case PartTag, PartBuild, PartDigest:
if !yield(PartTag, s[i+1:j]) {
return
}
state, j, partLen = PartModel, i, 0
default:
yield(PartExtraneous, s[i+1:j])
return
}
case '/':
switch state {
case PartModel, PartTag, PartBuild, PartDigest:
if !yield(PartModel, s[i+1:j]) {
return
}
state, j = PartNamespace, i
case PartNamespace:
if !yield(PartNamespace, s[i+1:j]) {
return
}
state, j, partLen = PartHost, i, 0
default:
yield(PartExtraneous, s[i+1:j])
return
}
default:
if s[i] == '.' {
if numConsecutiveDots++; numConsecutiveDots > 1 {
yield(state, "")
return
}
} else {
numConsecutiveDots = 0
}
}
}
if state <= PartNamespace {
yield(state, s[:j])
} else {
yield(PartModel, s[:j])
}
}
}
func (r Name) IsZero() bool {
return r.parts == [NumParts]string{}
}
// IsValid reports if a model has at minimum a valid model part.
func (r Name) IsValid() bool {
// Parts ensures we only have valid parts, so no need to validate
// them here, only check if we have a name or not.
return r.parts[PartModel] != ""
}
// ParseNameFromURLPath parses forms of a URL path into a Name. Specifically,
// it trims any leading "/" and then calls [ParseName] with fill.
func ParseNameFromURLPath(s, fill string) Name {
s = strings.TrimPrefix(s, "/")
return ParseName(s, fill)
}
// URLPath returns a complete, canonicalized, relative URL path using the parts of a
// complete Name.
//
// The parts maintain their original case.
//
// Example:
//
// ParseName("example.com/namespace/model:tag+build").URLPath() // returns "/example.com/namespace/model:tag"
func (r Name) URLPath() string {
return r.DisplayShortest(MaskNothing)
}
// ParseNameFromFilepath parses a file path into a Name. The input string must be a
// valid file path representation of a model name in the form:
//
// host/namespace/model/tag/build
//
// The zero valid is returned if s does not contain all path elements
// leading up to the model part, or if any path element is an invalid part
// for the its corresponding part kind.
//
// The fill string is used to fill in missing parts of any constructed Name.
// See [ParseName] for more information on the fill string.
func ParseNameFromFilepath(s, fill string) Name {
var r Name
for i := range PartBuild + 1 {
part, rest, _ := strings.Cut(s, string(filepath.Separator))
if !isValidPart(i, part) {
return Name{}
}
r.parts[i] = part
s = rest
if s == "" {
break
}
}
if s != "" {
return Name{}
}
if !r.IsValid() {
return Name{}
}
return fillName(r, fill)
}
// Filepath returns a complete, canonicalized, relative file path using the
// parts of a complete Name.
//
// Each parts is downcased, except for the build part which is upcased.
//
// Example:
//
// ParseName("example.com/namespace/model:tag+build").Filepath() // returns "example.com/namespace/model/tag/BUILD"
func (r Name) Filepath() string {
for i := range r.parts {
if PartKind(i) == PartBuild {
r.parts[i] = strings.ToUpper(r.parts[i])
} else {
r.parts[i] = strings.ToLower(r.parts[i])
}
}
return filepath.Join(r.parts[:]...)
}
// FilepathNoBuild returns a complete, canonicalized, relative file path using
// the parts of a complete Name, but without the build part.
func (r Name) FilepathNoBuild() string {
for i := range PartBuild {
r.parts[i] = strings.ToLower(r.parts[i])
}
return filepath.Join(r.parts[:PartBuild]...)
}
// isValidPart reports if s contains all valid characters for the given
// part kind.
func isValidPart(kind PartKind, s string) bool {
if s == "" {
return false
}
var consecutiveDots int
for _, c := range []byte(s) {
if c == '.' {
if consecutiveDots++; consecutiveDots >= 2 {
return false
}
} else {
consecutiveDots = 0
}
if !isValidByteFor(kind, c) {
return false
}
}
return true
}
func isValidByteFor(kind PartKind, c byte) bool {
if kind == PartNamespace && c == '.' {
return false
}
if c == '.' || c == '-' {
return true
}
if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' {
return true
}
return false
}

View File

@@ -1,708 +0,0 @@
package model
import (
"bytes"
"cmp"
"fmt"
"log/slog"
"path/filepath"
"slices"
"strings"
"testing"
)
type fields struct {
host, namespace, model, tag, build string
digest string
}
func fieldsFromName(p Name) fields {
return fields{
host: p.parts[PartHost],
namespace: p.parts[PartNamespace],
model: p.parts[PartModel],
tag: p.parts[PartTag],
build: p.parts[PartBuild],
digest: p.parts[PartDigest],
}
}
var testNames = map[string]fields{
"mistral:latest": {model: "mistral", tag: "latest"},
"mistral": {model: "mistral"},
"mistral:30B": {model: "mistral", tag: "30B"},
"mistral:7b": {model: "mistral", tag: "7b"},
"mistral:7b+Q4_0": {model: "mistral", tag: "7b", build: "Q4_0"},
"mistral+KQED": {model: "mistral", build: "KQED"},
"mistral.x-3:7b+Q4_0": {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
"mistral:7b+q4_0": {model: "mistral", tag: "7b", build: "q4_0"},
"llama2": {model: "llama2"},
"user/model": {namespace: "user", model: "model"},
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
// invalid digest
"mistral:latest@invalid256-": {},
"mistral:latest@-123": {},
"mistral:latest@!-123": {},
"mistral:latest@1-!": {},
"mistral:latest@": {},
// resolved
"x@sha123-1": {model: "x", digest: "sha123-1"},
"@sha456-2": {digest: "sha456-2"},
"@@sha123-1": {},
// preserves case for build
"x+b": {model: "x", build: "b"},
// invalid (includes fuzzing trophies)
" / / : + ": {},
" / : + ": {},
" : + ": {},
" + ": {},
" : ": {},
" / ": {},
" /": {},
"/ ": {},
"/": {},
":": {},
"+": {},
// (".") in namepsace is not allowed
"invalid.com/7b+x": {},
"invalid:7b+Q4_0:latest": {},
"in valid": {},
"invalid/y/z/foo": {},
"/0": {},
"0 /0": {},
"0 /": {},
"0/": {},
":/0": {},
"+0/00000": {},
"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
"0//0": {},
"m+^^^": {},
"file:///etc/passwd": {},
"file:///etc/passwd:latest": {},
"file:///etc/passwd:latest+u": {},
":x": {},
"+x": {},
"x+": {},
// Disallow ("\.+") in any part to prevent path traversal anywhere
// we convert the name to a path.
"../etc/passwd": {},
".../etc/passwd": {},
"./../passwd": {},
"./0+..": {},
strings.Repeat("a", MaxNamePartLen): {model: strings.Repeat("a", MaxNamePartLen)},
strings.Repeat("a", MaxNamePartLen+1): {},
}
// TestConsecutiveDots tests that consecutive dots are not allowed in any
// part, to avoid path traversal. There also are some tests in testNames, but
// this test is more exhaustive and exists to emphasize the importance of
// preventing path traversal.
func TestNameConsecutiveDots(t *testing.T) {
for i := 1; i < 10; i++ {
s := strings.Repeat(".", i)
if i > 1 {
if g := ParseName(s, FillNothing).DisplayLong(); g != "" {
t.Errorf("ParseName(%q) = %q; want empty string", s, g)
}
} else {
if g := ParseName(s, FillNothing).DisplayLong(); g != s {
t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
}
}
}
}
func TestNameParts(t *testing.T) {
var p Name
if w, g := int(NumParts), len(p.parts); w != g {
t.Errorf("Parts() = %d; want %d", g, w)
}
}
func TestNamePartString(t *testing.T) {
if g := PartKind(-2).String(); g != "Unknown" {
t.Errorf("Unknown part = %q; want %q", g, "Unknown")
}
for kind, name := range kindNames {
if g := kind.String(); g != name {
t.Errorf("%s = %q; want %q", kind, g, name)
}
}
}
func TestParseName(t *testing.T) {
for baseName, want := range testNames {
for _, prefix := range []string{"", "https://", "http://"} {
// We should get the same results with or without the
// http(s) prefixes
s := prefix + baseName
t.Run(s, func(t *testing.T) {
name := ParseName(s, FillNothing)
got := fieldsFromName(name)
if got != want {
t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
}
// test round-trip
if !ParseName(name.DisplayLong(), FillNothing).EqualFold(name) {
t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName)
}
})
}
}
}
func TestParseNameFill(t *testing.T) {
cases := []struct {
in string
fill string
want string
}{
{"mistral", "example.com/library/?:latest+Q4_0", "example.com/library/mistral:latest+Q4_0"},
{"mistral", "example.com/library/?:latest", "example.com/library/mistral:latest"},
{"llama2:x", "example.com/library/?:latest+Q4_0", "example.com/library/llama2:x+Q4_0"},
// Invalid
{"", "example.com/library/?:latest+Q4_0", ""},
{"llama2:?", "example.com/library/?:latest+Q4_0", ""},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
name := ParseName(tt.in, tt.fill)
if g := name.DisplayLong(); g != tt.want {
t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want)
}
})
}
t.Run("invalid fill", func(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatal("expected panic")
}
}()
ParseName("x", "^")
})
}
func TestParseNameHTTPDoublePrefixStrip(t *testing.T) {
cases := []string{
"http://https://valid.com/valid/valid:latest",
"https://http://valid.com/valid/valid:latest",
}
for _, s := range cases {
t.Run(s, func(t *testing.T) {
name := ParseName(s, FillNothing)
if name.IsValid() {
t.Errorf("expected invalid path; got %#v", name)
}
})
}
}
func TestCompleteWithAndWithoutBuild(t *testing.T) {
cases := []struct {
in string
complete bool
completeNoBuild bool
}{
{"", false, false},
{"incomplete/mistral:7b+x", false, false},
{"incomplete/mistral:7b+Q4_0", false, false},
{"incomplete:7b+x", false, false},
{"complete.com/x/mistral:latest+Q4_0", true, true},
{"complete.com/x/mistral:latest", false, true},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
p := ParseName(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
if g := p.IsComplete(); g != tt.complete {
t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
}
if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
}
})
}
// Complete uses Parts which returns a slice, but it should be
// inlined when used in Complete, preventing any allocations or
// escaping to the heap.
allocs := testing.AllocsPerRun(1000, func() {
keep(ParseName("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
})
if allocs > 0 {
t.Errorf("Complete allocs = %v; want 0", allocs)
}
}
func TestNameLogValue(t *testing.T) {
cases := []string{
"example.com/library/mistral:latest+Q4_0",
"mistral:latest",
"mistral:7b+Q4_0",
}
for _, s := range cases {
t.Run(s, func(t *testing.T) {
var b bytes.Buffer
log := slog.New(slog.NewTextHandler(&b, nil))
name := ParseName(s, FillNothing)
log.Info("", "name", name)
want := fmt.Sprintf("name=%s", name.GoString())
got := b.String()
if !strings.Contains(got, want) {
t.Errorf("expected log output to contain %q; got %q", want, got)
}
})
}
}
func TestNameGoString(t *testing.T) {
cases := []struct {
name string
in string
wantString string
wantGoString string // default is tt.in
}{
{
name: "Complete Name",
in: "example.com/library/mistral:latest+Q4_0",
wantGoString: "example.com/library/mistral:latest+Q4_0@?",
},
{
name: "Short Name",
in: "mistral:latest",
wantGoString: "?/?/mistral:latest+?@?",
},
{
name: "Long Name",
in: "library/mistral:latest",
wantGoString: "?/library/mistral:latest+?@?",
},
{
name: "Case Preserved",
in: "Library/Mistral:Latest",
wantGoString: "?/Library/Mistral:Latest+?@?",
},
{
name: "With digest",
in: "Library/Mistral:Latest@sha256-123456",
wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
p := ParseName(tt.in, FillNothing)
tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
}
})
}
}
func TestDisplayLongest(t *testing.T) {
g := ParseName("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
if g != "example.com/library/mistral:latest" {
t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
}
}
func TestDisplayShortest(t *testing.T) {
cases := []struct {
in string
mask string
want string
wantPanic bool
}{
{"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false},
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
// case-insensitive
{"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false},
{"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false},
// zero value
{"", MaskDefault, "", true},
// invalid mask
{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
// DefaultMask
{"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false},
// Auto-Fill
{"x", "example.com/library/_:latest", "x", false},
{"x", "example.com/library/_:latest+Q4_0", "x", false},
{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
}
for _, tt := range cases {
t.Run("", func(t *testing.T) {
defer func() {
if tt.wantPanic {
if recover() == nil {
t.Errorf("expected panic")
}
}
}()
p := ParseName(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
if g := p.DisplayShortest(tt.mask); g != tt.want {
t.Errorf("got = %q; want %q", g, tt.want)
}
})
}
}
func TestParseNameAllocs(t *testing.T) {
allocs := testing.AllocsPerRun(1000, func() {
keep(ParseName("example.com/mistral:7b+Q4_0", FillNothing))
})
if allocs > 0 {
t.Errorf("ParseName allocs = %v; want 0", allocs)
}
}
func BenchmarkParseName(b *testing.B) {
b.ReportAllocs()
for range b.N {
keep(ParseName("example.com/mistral:7b+Q4_0", FillNothing))
}
}
func FuzzParseNameFromFilepath(f *testing.F) {
f.Add("example.com/library/mistral/7b/Q4_0")
f.Add("example.com/../mistral/7b/Q4_0")
f.Add("example.com/x/../7b/Q4_0")
f.Add("example.com/x/../7b")
f.Fuzz(func(t *testing.T, s string) {
name := ParseNameFromFilepath(s, FillNothing)
if strings.Contains(s, "..") && !name.IsZero() {
t.Fatalf("non-zero value for path with '..': %q", s)
}
if name.IsValid() == name.IsZero() {
t.Errorf("expected valid path to be non-zero value; got %#v", name)
}
})
}
func FuzzParseName(f *testing.F) {
f.Add("example.com/mistral:7b+Q4_0")
f.Add("example.com/mistral:7b+q4_0")
f.Add("example.com/mistral:7b+x")
f.Add("x/y/z:8n+I")
f.Add(":x")
f.Add("@sha256-123456")
f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
f.Add(":@!@")
f.Add("...")
f.Fuzz(func(t *testing.T, s string) {
r0 := ParseName(s, FillNothing)
if strings.Contains(s, "..") && !r0.IsZero() {
t.Fatalf("non-zero value for path with '..': %q", s)
}
if !r0.IsValid() && !r0.IsResolved() {
if !r0.EqualFold(Name{}) {
t.Errorf("expected invalid path to be zero value; got %#v", r0)
}
t.Skipf("invalid path: %q", s)
}
for _, p := range r0.parts {
if len(p) > MaxNamePartLen {
t.Errorf("part too long: %q", p)
}
}
if !strings.EqualFold(r0.DisplayLong(), s) {
t.Errorf("String() did not round-trip with case insensitivity: %q\ngot = %q\nwant = %q", s, r0.DisplayLong(), s)
}
r1 := ParseName(r0.DisplayLong(), FillNothing)
if !r0.EqualFold(r1) {
t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
}
})
}
func TestNameStringAllocs(t *testing.T) {
name := ParseName("example.com/ns/mistral:latest+Q4_0", FillNothing)
allocs := testing.AllocsPerRun(1000, func() {
keep(name.DisplayLong())
})
if allocs > 1 {
t.Errorf("String allocs = %v; want 0", allocs)
}
}
func TestNamePath(t *testing.T) {
cases := []struct {
in string
want string
}{
{"example.com/library/mistral:latest+Q4_0", "example.com/library/mistral:latest"},
// incomplete
{"example.com/library/mistral:latest", "example.com/library/mistral:latest"},
{"", ""},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
p := ParseName(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
if g := p.URLPath(); g != tt.want {
t.Errorf("got = %q; want %q", g, tt.want)
}
})
}
}
func TestNameFilepath(t *testing.T) {
cases := []struct {
in string
want string
wantNoBuild string
}{
{
in: "example.com/library/mistral:latest+Q4_0",
want: "example.com/library/mistral/latest/Q4_0",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "Example.Com/Library/Mistral:Latest+Q4_0",
want: "example.com/library/mistral/latest/Q4_0",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "Example.Com/Library/Mistral:Latest+Q4_0",
want: "example.com/library/mistral/latest/Q4_0",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "example.com/library/mistral:latest",
want: "example.com/library/mistral/latest",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "",
want: "",
wantNoBuild: "",
},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
p := ParseName(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
g := p.Filepath()
g = filepath.ToSlash(g)
if g != tt.want {
t.Errorf("got = %q; want %q", g, tt.want)
}
g = p.FilepathNoBuild()
g = filepath.ToSlash(g)
if g != tt.wantNoBuild {
t.Errorf("got = %q; want %q", g, tt.wantNoBuild)
}
})
}
}
func TestParseNameFilepath(t *testing.T) {
cases := []struct {
in string
fill string // default is FillNothing
want string
}{
{
in: "example.com/library/mistral/latest/Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "example.com/library/mistral/latest",
fill: "?/?/?:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "example.com/library/mistral",
fill: "?/?/?:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "example.com/library",
want: "",
},
{
in: "example.com/",
want: "",
},
{
in: "example.com/^/mistral/latest/Q4_0",
want: "",
},
{
in: "example.com/library/mistral/../Q4_0",
want: "",
},
{
in: "example.com/library/mistral/latest/Q4_0/extra",
want: "",
},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
fill := cmp.Or(tt.fill, FillNothing)
want := ParseName(tt.want, fill)
if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) {
t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
}
})
}
}
func TestParseNameFromPath(t *testing.T) {
cases := []struct {
in string
want string
fill string // default is FillNothing
}{
{
in: "example.com/library/mistral:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "/example.com/library/mistral:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "/example.com/library/mistral",
want: "example.com/library/mistral",
},
{
in: "/example.com/library/mistral",
fill: "?/?/?:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "/example.com/library",
want: "",
},
{
in: "/example.com/",
want: "",
},
{
in: "/example.com/^/mistral/latest",
want: "",
},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
fill := cmp.Or(tt.fill, FillNothing)
if g := ParseNameFromURLPath(tt.in, fill); g.DisplayLong() != tt.want {
t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
}
})
}
}
func ExampleName_MapHash() {
m := map[uint64]bool{}
// key 1
m[ParseName("mistral:latest+q4", FillNothing).MapHash()] = true
m[ParseName("miSTRal:latest+Q4", FillNothing).MapHash()] = true
m[ParseName("mistral:LATest+Q4", FillNothing).MapHash()] = true
// key 2
m[ParseName("mistral:LATest", FillNothing).MapHash()] = true
fmt.Println(len(m))
// Output:
// 2
}
func ExampleName_CompareFold_sort() {
names := []Name{
ParseName("mistral:latest", FillNothing),
ParseName("mistRal:7b+q4", FillNothing),
ParseName("MIstral:7b", FillNothing),
}
slices.SortFunc(names, Name.CompareFold)
for _, n := range names {
fmt.Println(n.DisplayLong())
}
// Output:
// MIstral:7b
// mistRal:7b+q4
// mistral:latest
}
func ExampleName_completeAndResolved() {
for _, s := range []string{
"x/y/z:latest+q4_0@sha123-1",
"x/y/z:latest+q4_0",
"@sha123-1",
} {
name := ParseName(s, FillNothing)
fmt.Printf("complete:%v resolved:%v digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
}
// Output:
// complete:true resolved:true digest:sha123-1
// complete:true resolved:false digest:
// complete:false resolved:true digest:sha123-1
}
func ExampleName_DisplayShortest() {
name := ParseName("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
fmt.Println(name.DisplayShortest("example.com/_/_:_"))
fmt.Println(name.DisplayShortest("_/_/_:_"))
// Default
name = ParseName("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
fmt.Println(name.DisplayShortest(""))
// Output:
// mistral
// jmorganca/mistral
// jmorganca/mistral:latest
// example.com/jmorganca/mistral:latest
// mistral
}
func keep[T any](v T) T { return v }

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("/0")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("0//0")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("0 /0")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("+0/00000")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string(":")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")

View File

@@ -1,15 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package structs contains the Incomparable type.
package structs
// Incomparable is a zero-width incomparable type. If added as the
// first field in a struct, it marks that struct as not comparable
// (can't do == or be a map key) and usually doesn't add any width to
// the struct (unless the struct has only small fields).
//
// By making a struct incomparable, you can prevent misuse (prevent
// people from using ==), but also you can shrink generated binaries,
// as the compiler can omit equality funcs from the binary.
type Incomparable [0]func()