mirror of
https://github.com/ollama/ollama.git
synced 2026-01-06 06:31:14 -05:00
Compare commits
48 Commits
modenameen
...
jmorganca/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2bed62926e | ||
|
|
aad8d128a0 | ||
|
|
ec1acbb867 | ||
|
|
e4859c4563 | ||
|
|
8e30eb26bd | ||
|
|
0b5c589ca2 | ||
|
|
65fadddc85 | ||
|
|
ed5fb088c4 | ||
|
|
f81f308118 | ||
|
|
b1390a7b37 | ||
|
|
11d83386a5 | ||
|
|
bb31def011 | ||
|
|
41e03ede95 | ||
|
|
7fea1ecdf6 | ||
|
|
054894271d | ||
|
|
6fef042f0b | ||
|
|
5c0c2d1d09 | ||
|
|
37f9c8ad99 | ||
|
|
2a80f55e2a | ||
|
|
421c878a2d | ||
|
|
36666c2142 | ||
|
|
85801317d1 | ||
|
|
2ed0d65948 | ||
|
|
d459dc4ad1 | ||
|
|
40bc4622ef | ||
|
|
c0f818a07a | ||
|
|
8671fdeda6 | ||
|
|
2619850fb4 | ||
|
|
8feb97dc0d | ||
|
|
4e1ff6dcbb | ||
|
|
8589d752ac | ||
|
|
de4ded68b0 | ||
|
|
9b5a3c5991 | ||
|
|
00b0699c75 | ||
|
|
993cf8bf55 | ||
|
|
7bb7cb8a60 | ||
|
|
b123be5b71 | ||
|
|
ddf5c09a9b | ||
|
|
5f73c08729 | ||
|
|
f503a848c2 | ||
|
|
36a6daccab | ||
|
|
ceb0e26e5e | ||
|
|
284e02bed0 | ||
|
|
3450a57d4a | ||
|
|
592dae31c8 | ||
|
|
2010cbc5fa | ||
|
|
ac0801eced | ||
|
|
ad66e5b060 |
13
.github/workflows/release.yaml
vendored
13
.github/workflows/release.yaml
vendored
@@ -311,29 +311,18 @@ jobs:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cpu
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cuda
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-cuda-deps
|
||||
path: dist/deps
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: windows-rocm-deps
|
||||
path: dist/deps
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: generate-windows-rocm
|
||||
path: |
|
||||
llm/build
|
||||
dist/windows-amd64
|
||||
- run: dir llm/build
|
||||
- run: |
|
||||
$gopath=(get-command go).source | split-path -parent
|
||||
@@ -342,8 +331,6 @@ jobs:
|
||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$env:PATH"
|
||||
$env:OLLAMA_SKIP_GENERATE="1"
|
||||
$env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
|
||||
$env:HIP_PATH=$(resolve-path ".\dist\deps")
|
||||
& .\scripts\build_windows.ps1
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
||||
15
.github/workflows/test.yaml
vendored
15
.github/workflows/test.yaml
vendored
@@ -1,5 +1,15 @@
|
||||
name: test
|
||||
|
||||
concurrency:
|
||||
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
|
||||
# cancels running CI jobs and starts all new ones.
|
||||
#
|
||||
# For non-PR pushes, concurrency.group needs to be unique for every distinct
|
||||
# CI run we want to have happen. Use run_id, which in practice means all
|
||||
# non-PR CI runs will be allowed to run without preempting each other.
|
||||
group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
@@ -21,7 +31,9 @@ jobs:
|
||||
- id: changes
|
||||
run: |
|
||||
changed() {
|
||||
git diff-tree -r --no-commit-id --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
|
||||
git diff-tree -r --no-commit-id --name-only \
|
||||
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
|
||||
${{ github.event.pull_request.head.sha }} \
|
||||
| xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
|
||||
}
|
||||
|
||||
@@ -283,7 +295,6 @@ jobs:
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
amd64) echo ARCH=x86_64 ;;
|
||||
|
||||
@@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||
func DefaultOptions() Options {
|
||||
return Options{
|
||||
// options set on request to runner
|
||||
NumPredict: -1,
|
||||
NumKeep: 0,
|
||||
NumPredict: -1,
|
||||
|
||||
// set a minimal num_keep to avoid issues on context shifts
|
||||
NumKeep: 4,
|
||||
Temperature: 0.8,
|
||||
TopK: 40,
|
||||
TopP: 0.9,
|
||||
|
||||
@@ -92,12 +92,8 @@ Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64
|
||||
Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||
; Assumes v5.7, may need adjustments for v6
|
||||
#if GetEnv("HIP_PATH") != ""
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
||||
; amdhip64.dll dependency comes from the driver and must be installed already
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
|
||||
#if DirExists("..\dist\windows-amd64\rocm")
|
||||
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
|
||||
#endif
|
||||
|
||||
|
||||
@@ -133,7 +129,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
|
||||
|
||||
|
||||
;FinishedHeadingLabel=Run your first model
|
||||
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama2
|
||||
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3
|
||||
;ClickFinish=%n
|
||||
|
||||
[Registry]
|
||||
|
||||
209
cmd/cmd.go
209
cmd/cmd.go
@@ -17,6 +17,7 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -53,8 +54,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
p := progress.NewProgress(os.Stderr)
|
||||
defer p.Stop()
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
|
||||
modelfile, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -95,95 +94,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO make this work w/ adapters
|
||||
if fi.IsDir() {
|
||||
tf, err := os.CreateTemp("", "ollama-tf")
|
||||
// this is likely a safetensors or pytorch directory
|
||||
// TODO make this work w/ adapters
|
||||
tempfile, err := tempZipFiles(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(tf.Name())
|
||||
defer os.RemoveAll(tempfile)
|
||||
|
||||
zf := zip.NewWriter(tf)
|
||||
|
||||
files := []string{}
|
||||
|
||||
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(tfiles) == 0 {
|
||||
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
files = append(files, tfiles...)
|
||||
|
||||
if len(files) == 0 {
|
||||
return fmt.Errorf("no models were found in '%s'", path)
|
||||
}
|
||||
|
||||
// add the safetensor/torch config file + tokenizer
|
||||
files = append(files, filepath.Join(path, "config.json"))
|
||||
files = append(files, filepath.Join(path, "params.json"))
|
||||
files = append(files, filepath.Join(path, "added_tokens.json"))
|
||||
files = append(files, filepath.Join(path, "tokenizer.model"))
|
||||
|
||||
for _, fn := range files {
|
||||
f, err := os.Open(fn)
|
||||
|
||||
// just skip whatever files aren't there
|
||||
if os.IsNotExist(err) {
|
||||
if strings.HasSuffix(fn, "tokenizer.model") {
|
||||
// try the parent dir before giving up
|
||||
parentDir := filepath.Dir(path)
|
||||
newFn := filepath.Join(parentDir, "tokenizer.model")
|
||||
f, err = os.Open(newFn)
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h, err := zip.FileInfoHeader(fi)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h.Name = filepath.Base(fn)
|
||||
h.Method = zip.Store
|
||||
|
||||
w, err := zf.CreateHeader(h)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = io.Copy(w, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if err := zf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := tf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
path = tf.Name()
|
||||
path = tempfile
|
||||
}
|
||||
|
||||
digest, err := createBlob(cmd, client, path)
|
||||
@@ -191,10 +111,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
|
||||
name := c.Name
|
||||
if c.Name == "model" {
|
||||
name = "from"
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
|
||||
modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
|
||||
}
|
||||
}
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
if resp.Digest != "" {
|
||||
spinner.Stop()
|
||||
@@ -228,6 +155,114 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func tempZipFiles(path string) (string, error) {
|
||||
tempfile, err := os.CreateTemp("", "ollama-tf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer tempfile.Close()
|
||||
|
||||
zipfile := zip.NewWriter(tempfile)
|
||||
defer zipfile.Close()
|
||||
|
||||
detectContentType := func(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var b bytes.Buffer
|
||||
b.Grow(512)
|
||||
|
||||
if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
|
||||
return contentType, nil
|
||||
}
|
||||
|
||||
glob := func(pattern, contentType string) ([]string, error) {
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, safetensor := range matches {
|
||||
if ct, err := detectContentType(safetensor); err != nil {
|
||||
return nil, err
|
||||
} else if ct != contentType {
|
||||
return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
|
||||
}
|
||||
}
|
||||
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
var files []string
|
||||
if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||
// safetensors files might be unresolved git lfs references; skip if they are
|
||||
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
||||
files = append(files, st...)
|
||||
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
||||
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
||||
files = append(files, pt...)
|
||||
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
|
||||
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||
// covers consolidated.x.pth, consolidated.pth
|
||||
files = append(files, pt...)
|
||||
} else {
|
||||
return "", errors.New("no safetensors or torch files found")
|
||||
}
|
||||
|
||||
// add configuration files, json files are detected as text/plain
|
||||
js, err := glob(filepath.Join(path, "*.json"), "text/plain")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
files = append(files, js...)
|
||||
|
||||
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
|
||||
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
|
||||
// tokenizer.model might be a unresolved git lfs reference; error if it is
|
||||
files = append(files, tks...)
|
||||
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
|
||||
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
|
||||
files = append(files, tks...)
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
zfi, err := zip.FileInfoHeader(fi)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
zf, err := zipfile.CreateHeader(zfi)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(zf, f); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return tempfile.Name(), nil
|
||||
}
|
||||
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||
bin, err := os.Open(path)
|
||||
if err != nil {
|
||||
|
||||
@@ -14,7 +14,7 @@ As this is a preview release, you should expect a few bugs here and there. If
|
||||
you run into a problem you can reach out on
|
||||
[Discord](https://discord.gg/ollama), or file an
|
||||
[issue](https://github.com/ollama/ollama/issues).
|
||||
Logs will often be helpful in dianosing the problem (see
|
||||
Logs will often be helpful in diagnosing the problem (see
|
||||
[Troubleshooting](#troubleshooting) below)
|
||||
|
||||
## System Requirements
|
||||
|
||||
@@ -32,9 +32,25 @@ func PayloadsDir() (string, error) {
|
||||
slog.Error("failed to lookup executable path", "error", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
slog.Error("failed to lookup working directory", "error", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
var paths []string
|
||||
for _, root := range []string{appExe, cwd} {
|
||||
paths = append(paths,
|
||||
filepath.Join(root),
|
||||
filepath.Join(root, "windows-"+runtime.GOARCH),
|
||||
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
||||
)
|
||||
}
|
||||
|
||||
// Try a few variations to improve developer experience when building from source in the local tree
|
||||
for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} {
|
||||
candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners")
|
||||
for _, p := range paths {
|
||||
candidate := filepath.Join(p, "ollama_runners")
|
||||
_, err := os.Stat(candidate)
|
||||
if err == nil {
|
||||
runnersDir = candidate
|
||||
|
||||
@@ -21,7 +21,7 @@ init_vars() {
|
||||
# TODO - add additional optimization flags...
|
||||
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
|
||||
fi
|
||||
case $(uname -s) in
|
||||
case $(uname -s) in
|
||||
"Darwin")
|
||||
LIB_EXT="dylib"
|
||||
WHOLE_ARCHIVE="-Wl,-force_load"
|
||||
|
||||
@@ -165,11 +165,11 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
|
||||
fi
|
||||
if [ "${ARCH}" == "arm64" ]; then
|
||||
echo "ARM CPU detected - disabling unsupported AVX instructions"
|
||||
|
||||
|
||||
# ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
|
||||
#
|
||||
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
|
||||
# Disabling has minimal performance effect while maintaining compatibility.
|
||||
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
|
||||
# Disabling has minimal performance effect while maintaining compatibility.
|
||||
ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
|
||||
fi
|
||||
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
|
||||
|
||||
@@ -26,16 +26,25 @@ function amdGPUs {
|
||||
$GPU_LIST -join ';'
|
||||
}
|
||||
|
||||
|
||||
function init_vars {
|
||||
$script:SRC_DIR = $(resolve-path "..\..\")
|
||||
$script:llamacppDir = "../llama.cpp"
|
||||
if (!$script:SRC_DIR) {
|
||||
$script:SRC_DIR = $(resolve-path "..\..\")
|
||||
}
|
||||
if (!$script:llamacppDir) {
|
||||
$script:llamacppDir = "../llama.cpp"
|
||||
}
|
||||
if (!$script:cmakeTargets) {
|
||||
$script:cmakeTargets = @("ollama_llama_server")
|
||||
}
|
||||
$script:cmakeDefs = @(
|
||||
"-DBUILD_SHARED_LIBS=on",
|
||||
"-DLLAMA_NATIVE=off"
|
||||
)
|
||||
$script:cmakeTargets = @("ollama_llama_server")
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||
$script:ARCH = "amd64" # arm not yet supported.
|
||||
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
|
||||
md "$script:DIST_BASE" -ea 0 > $null
|
||||
if ($env:CGO_CFLAGS -contains "-g") {
|
||||
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
|
||||
$script:config = "RelWithDebInfo"
|
||||
@@ -166,137 +175,191 @@ function cleanup {
|
||||
}
|
||||
}
|
||||
|
||||
init_vars
|
||||
git_module_setup
|
||||
apply_patches
|
||||
|
||||
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
||||
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||
|
||||
if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
|
||||
function build_static() {
|
||||
if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
|
||||
# GCC build for direct linking into the Go binary
|
||||
init_vars
|
||||
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
|
||||
# as we need this to be compiled by gcc for golang to be able to link with itx
|
||||
write-host "Checking for MinGW..."
|
||||
# error action ensures we exit on failure
|
||||
get-command gcc
|
||||
get-command mingw32-make
|
||||
$oldTargets = $script:cmakeTargets
|
||||
$script:cmakeTargets = @("llama", "ggml")
|
||||
$script:cmakeDefs = @(
|
||||
"-G", "MinGW Makefiles"
|
||||
"-DCMAKE_C_COMPILER=gcc.exe",
|
||||
"-DCMAKE_CXX_COMPILER=g++.exe",
|
||||
"-DBUILD_SHARED_LIBS=off",
|
||||
"-DLLAMA_NATIVE=off",
|
||||
"-DLLAMA_AVX=off",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DLLAMA_AVX512=off",
|
||||
"-DLLAMA_F16C=off",
|
||||
"-DLLAMA_FMA=off")
|
||||
$script:buildDir="../build/windows/${script:ARCH}_static"
|
||||
write-host "Building static library"
|
||||
build
|
||||
$script:cmakeTargets = $oldTargets
|
||||
} else {
|
||||
write-host "Skipping CPU generation step as requested"
|
||||
}
|
||||
}
|
||||
|
||||
function build_cpu() {
|
||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
|
||||
# remaining llama.cpp builds use MSVC
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
||||
$script:distDir="$script:DIST_BASE\cpu"
|
||||
write-host "Building LCD CPU"
|
||||
build
|
||||
sign
|
||||
install
|
||||
} else {
|
||||
write-host "Skipping CPU generation step as requested"
|
||||
}
|
||||
}
|
||||
|
||||
function build_cpu_avx() {
|
||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
|
||||
$script:distDir="$script:DIST_BASE\cpu_avx"
|
||||
write-host "Building AVX CPU"
|
||||
build
|
||||
sign
|
||||
install
|
||||
} else {
|
||||
write-host "Skipping CPU AVX generation step as requested"
|
||||
}
|
||||
}
|
||||
|
||||
function build_cpu_avx2() {
|
||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
|
||||
$script:distDir="$script:DIST_BASE\cpu_avx2"
|
||||
write-host "Building AVX2 CPU"
|
||||
build
|
||||
sign
|
||||
install
|
||||
} else {
|
||||
write-host "Skipping CPU AVX2 generation step as requested"
|
||||
}
|
||||
}
|
||||
|
||||
function build_cuda() {
|
||||
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
|
||||
# Then build cuda as a dynamically loaded library
|
||||
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
||||
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
||||
if ($null -ne $script:CUDA_VERSION) {
|
||||
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
||||
}
|
||||
init_vars
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
||||
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
|
||||
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
|
||||
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
||||
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
||||
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
|
||||
write-host "building custom CUDA GPU"
|
||||
}
|
||||
build
|
||||
sign
|
||||
install
|
||||
|
||||
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
|
||||
} else {
|
||||
write-host "Skipping CUDA generation step"
|
||||
}
|
||||
}
|
||||
|
||||
function build_rocm() {
|
||||
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
|
||||
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
||||
if ($null -ne $script:ROCM_VERSION) {
|
||||
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
|
||||
}
|
||||
|
||||
init_vars
|
||||
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
|
||||
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
|
||||
$script:cmakeDefs += @(
|
||||
"-G", "Ninja",
|
||||
"-DCMAKE_C_COMPILER=clang.exe",
|
||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||
"-DLLAMA_HIPBLAS=on",
|
||||
"-DHIP_PLATFORM=amd",
|
||||
"-DLLAMA_AVX=on",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
||||
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
||||
"-DGPU_TARGETS=$(amdGPUs)"
|
||||
)
|
||||
|
||||
# Make sure the ROCm binary dir is first in the path
|
||||
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
|
||||
|
||||
# We have to clobber the LIB var from the developer shell for clang to work properly
|
||||
$env:LIB=""
|
||||
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
|
||||
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
|
||||
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
|
||||
write-host "building custom ROCM GPU"
|
||||
}
|
||||
write-host "Building ROCm"
|
||||
build
|
||||
# Ninja doesn't prefix with config name
|
||||
${script:config}=""
|
||||
if ($null -ne $script:DUMPBIN) {
|
||||
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
|
||||
}
|
||||
sign
|
||||
install
|
||||
|
||||
# Assumes v5.7, may need adjustments for v6
|
||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
|
||||
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
|
||||
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
|
||||
} else {
|
||||
write-host "Skipping ROCm generation step"
|
||||
}
|
||||
}
|
||||
|
||||
# GCC build for direct linking into the Go binary
|
||||
init_vars
|
||||
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
|
||||
# as we need this to be compiled by gcc for golang to be able to link with itx
|
||||
write-host "Checking for MinGW..."
|
||||
# error action ensures we exit on failure
|
||||
get-command gcc
|
||||
get-command mingw32-make
|
||||
$script:cmakeTargets = @("llama", "ggml")
|
||||
$script:cmakeDefs = @(
|
||||
"-G", "MinGW Makefiles"
|
||||
"-DCMAKE_C_COMPILER=gcc.exe",
|
||||
"-DCMAKE_CXX_COMPILER=g++.exe",
|
||||
"-DBUILD_SHARED_LIBS=off",
|
||||
"-DLLAMA_NATIVE=off",
|
||||
"-DLLAMA_AVX=off",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DLLAMA_AVX512=off",
|
||||
"-DLLAMA_F16C=off",
|
||||
"-DLLAMA_FMA=off")
|
||||
$script:buildDir="../build/windows/${script:ARCH}_static"
|
||||
write-host "Building static library"
|
||||
build
|
||||
if ($($args.count) -eq 0) {
|
||||
git_module_setup
|
||||
apply_patches
|
||||
build_static
|
||||
build_cpu
|
||||
build_cpu_avx
|
||||
build_cpu_avx2
|
||||
build_cuda
|
||||
build_rocm
|
||||
|
||||
# remaining llama.cpp builds use MSVC
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
||||
$script:distDir="$script:DIST_BASE\cpu"
|
||||
write-host "Building LCD CPU"
|
||||
build
|
||||
sign
|
||||
install
|
||||
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
|
||||
$script:distDir="$script:DIST_BASE\cpu_avx"
|
||||
write-host "Building AVX CPU"
|
||||
build
|
||||
sign
|
||||
install
|
||||
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
|
||||
$script:distDir="$script:DIST_BASE\cpu_avx2"
|
||||
write-host "Building AVX2 CPU"
|
||||
build
|
||||
sign
|
||||
install
|
||||
cleanup
|
||||
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
|
||||
} else {
|
||||
write-host "Skipping CPU generation step as requested"
|
||||
}
|
||||
|
||||
if ($null -ne $script:CUDA_LIB_DIR) {
|
||||
# Then build cuda as a dynamically loaded library
|
||||
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
||||
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
||||
if ($null -ne $script:CUDA_VERSION) {
|
||||
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
||||
}
|
||||
init_vars
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
||||
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
|
||||
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
|
||||
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
||||
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
||||
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
|
||||
write-host "building custom CUDA GPU"
|
||||
}
|
||||
build
|
||||
sign
|
||||
install
|
||||
}
|
||||
|
||||
if ($null -ne $env:HIP_PATH) {
|
||||
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
||||
if ($null -ne $script:ROCM_VERSION) {
|
||||
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
|
||||
}
|
||||
|
||||
init_vars
|
||||
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
|
||||
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
|
||||
$script:cmakeDefs += @(
|
||||
"-G", "Ninja",
|
||||
"-DCMAKE_C_COMPILER=clang.exe",
|
||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||
"-DLLAMA_HIPBLAS=on",
|
||||
"-DHIP_PLATFORM=amd",
|
||||
"-DLLAMA_AVX=on",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
||||
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
||||
"-DGPU_TARGETS=$(amdGPUs)"
|
||||
)
|
||||
|
||||
# Make sure the ROCm binary dir is first in the path
|
||||
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
|
||||
|
||||
# We have to clobber the LIB var from the developer shell for clang to work properly
|
||||
$env:LIB=""
|
||||
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
|
||||
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
|
||||
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
|
||||
write-host "building custom ROCM GPU"
|
||||
}
|
||||
write-host "Building ROCm"
|
||||
build
|
||||
# Ninja doesn't prefix with config name
|
||||
${script:config}=""
|
||||
if ($null -ne $script:DUMPBIN) {
|
||||
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
|
||||
}
|
||||
sign
|
||||
install
|
||||
}
|
||||
|
||||
|
||||
cleanup
|
||||
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
|
||||
for ( $i = 0; $i -lt $args.count; $i++ ) {
|
||||
write-host "performing $($args[$i])"
|
||||
& $($args[$i])
|
||||
}
|
||||
}
|
||||
Submodule llm/llama.cpp updated: 7593639ce3...46e12c4692
@@ -5,7 +5,6 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
@@ -100,8 +99,26 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
var layerCount int
|
||||
layers := ggml.Tensors().Layers()
|
||||
|
||||
var memoryLayerOutput uint64
|
||||
if layer, ok := layers["output_norm"]; ok {
|
||||
memoryLayerOutput += layer.size()
|
||||
}
|
||||
|
||||
if layer, ok := layers["output"]; ok {
|
||||
memoryLayerOutput += layer.size()
|
||||
} else if layer, ok := layers["token_embd"]; ok {
|
||||
memoryLayerOutput += layer.size()
|
||||
}
|
||||
|
||||
if gpus[0].Library == "metal" && opts.UseMMap {
|
||||
// memory is preallocated for output tensors
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
memoryRequiredPartial += memoryLayerOutput
|
||||
}
|
||||
|
||||
var layerCount int
|
||||
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
||||
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
||||
|
||||
@@ -115,15 +132,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
}
|
||||
}
|
||||
|
||||
var memoryLayerOutput uint64
|
||||
for k, v := range layers {
|
||||
if !strings.HasPrefix(k, "blk.") {
|
||||
memoryLayerOutput += v.size()
|
||||
}
|
||||
if gpus[0].Library != "metal" || !opts.UseMMap {
|
||||
// memory was not preallocated for output tensors
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
}
|
||||
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
|
||||
if memoryAvailable > memoryRequiredTotal {
|
||||
layerCount = int(ggml.KV().BlockCount()) + 1
|
||||
memoryRequiredPartial = memoryRequiredTotal
|
||||
|
||||
12
llm/patches/02-clip-log.diff
Normal file
12
llm/patches/02-clip-log.diff
Normal file
@@ -0,0 +1,12 @@
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index e431c7f7..f077e688 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -3,6 +3,7 @@
|
||||
// I'll gradually clean and extend it
|
||||
// Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
|
||||
#include "clip.h"
|
||||
+#include "common.h"
|
||||
#include "log.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
45
llm/patches/04-metal.diff
Normal file
45
llm/patches/04-metal.diff
Normal file
@@ -0,0 +1,45 @@
|
||||
diff --git a/ggml-metal.m b/ggml-metal.m
|
||||
index 0207b787..b5e9884b 100644
|
||||
--- a/ggml-metal.m
|
||||
+++ b/ggml-metal.m
|
||||
@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||
// to the matrix-vector kernel
|
||||
int ne11_mm_min = 1;
|
||||
|
||||
-#if 0
|
||||
// the numbers below are measured on M2 Ultra for 7B and 13B models
|
||||
// these numbers do not translate to other devices or model sizes
|
||||
// TODO: need to find a better approach
|
||||
- if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
|
||||
- switch (src0t) {
|
||||
- case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
||||
- case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
||||
- case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q4_0:
|
||||
- case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
||||
- case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
||||
- case GGML_TYPE_Q5_0: // not tested yet
|
||||
- case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
||||
- case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
||||
- case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
||||
- default: ne11_mm_min = 1; break;
|
||||
- }
|
||||
+ switch (src0t) {
|
||||
+ case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
||||
+ case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
||||
+ case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q4_0:
|
||||
+ case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
||||
+ case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
||||
+ case GGML_TYPE_Q5_0: // not tested yet
|
||||
+ case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
||||
+ case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
||||
+ case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
||||
+ default: ne11_mm_min = 1; break;
|
||||
}
|
||||
-#endif
|
||||
|
||||
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
||||
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
||||
@@ -442,7 +442,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
slog.Info("context expired before server started")
|
||||
return fmt.Errorf("timed out waiting for llama runner to start")
|
||||
return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
|
||||
case err := <-s.done:
|
||||
msg := ""
|
||||
if s.status != nil && s.status.LastErrMsg != "" {
|
||||
@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
return err
|
||||
}
|
||||
defer s.sem.Release(1)
|
||||
|
||||
// only allow maximum 10 "context shifts" to avoid infinite generation
|
||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||
slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
|
||||
}
|
||||
|
||||
request := map[string]any{
|
||||
"prompt": req.Prompt,
|
||||
"stream": true,
|
||||
|
||||
@@ -82,7 +82,7 @@ function buildOllama() {
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
|
||||
cp .\ollama.exe .\dist\windows-amd64\ollama-windows-amd64.exe
|
||||
cp .\ollama.exe .\dist\windows-amd64\
|
||||
}
|
||||
|
||||
function buildApp() {
|
||||
@@ -109,9 +109,6 @@ function gatherDependencies() {
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
|
||||
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"
|
||||
|
||||
cp "${script:NVIDIA_DIR}\cudart64_*.dll" "${script:DEPS_DIR}\"
|
||||
cp "${script:NVIDIA_DIR}\cublas64_*.dll" "${script:DEPS_DIR}\"
|
||||
cp "${script:NVIDIA_DIR}\cublasLt64_*.dll" "${script:DEPS_DIR}\"
|
||||
|
||||
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
@@ -123,15 +120,6 @@ function gatherDependencies() {
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
}
|
||||
if ($null -ne $env:HIP_PATH) {
|
||||
# Assumes v5.7, may need adjustments for v6
|
||||
rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\"
|
||||
md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null
|
||||
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\"
|
||||
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\"
|
||||
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
|
||||
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\"
|
||||
}
|
||||
}
|
||||
|
||||
function buildInstaller() {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
@@ -701,36 +702,39 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func CopyModel(src, dest string) error {
|
||||
srcModelPath := ParseModelPath(src)
|
||||
srcPath, err := srcModelPath.GetManifestPath()
|
||||
func CopyModel(src, dst model.Name) error {
|
||||
if !dst.IsFullyQualified() {
|
||||
return model.Unqualified(dst)
|
||||
}
|
||||
if !src.IsFullyQualified() {
|
||||
return model.Unqualified(src)
|
||||
}
|
||||
|
||||
manifests, err := GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
destModelPath := ParseModelPath(dest)
|
||||
destPath, err := destModelPath.GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
|
||||
dstpath := filepath.Join(manifests, dst.Filepath())
|
||||
if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// copy the file
|
||||
input, err := os.ReadFile(srcPath)
|
||||
srcpath := filepath.Join(manifests, src.Filepath())
|
||||
srcfile, err := os.Open(srcpath)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return err
|
||||
}
|
||||
defer srcfile.Close()
|
||||
|
||||
err = os.WriteFile(destPath, input, 0o644)
|
||||
dstfile, err := os.Create(dstpath)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return err
|
||||
}
|
||||
defer dstfile.Close()
|
||||
|
||||
return nil
|
||||
_, err = io.Copy(dstfile, srcfile)
|
||||
return err
|
||||
}
|
||||
|
||||
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/openai"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
@@ -145,6 +146,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
select {
|
||||
case runner = <-rCh:
|
||||
case err = <-eCh:
|
||||
if errors.Is(err, context.Canceled) {
|
||||
c.JSON(499, gin.H{"error": "request canceled"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
@@ -388,6 +394,11 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
||||
select {
|
||||
case runner = <-rCh:
|
||||
case err = <-eCh:
|
||||
if errors.Is(err, context.Canceled) {
|
||||
c.JSON(499, gin.H{"error": "request canceled"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
@@ -788,34 +799,34 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (s *Server) CopyModelHandler(c *gin.Context) {
|
||||
var req api.CopyRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
var r api.CopyRequest
|
||||
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
return
|
||||
case err != nil:
|
||||
} else if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if req.Source == "" || req.Destination == "" {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
|
||||
src := model.ParseName(r.Source)
|
||||
if !src.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
|
||||
}
|
||||
|
||||
dst := model.ParseName(r.Destination)
|
||||
if !dst.IsValid() {
|
||||
_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
|
||||
}
|
||||
|
||||
if len(c.Errors) > 0 {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := ParseModelPath(req.Destination).Validate(); err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := CopyModel(req.Source, req.Destination); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
|
||||
} else {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
return
|
||||
if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
|
||||
} else if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1215,6 +1226,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
select {
|
||||
case runner = <-rCh:
|
||||
case err = <-eCh:
|
||||
if errors.Is(err, context.Canceled) {
|
||||
c.JSON(499, gin.H{"error": "request canceled"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ import (
|
||||
type LlmRequest struct {
|
||||
ctx context.Context //nolint:containedctx
|
||||
model *Model
|
||||
ggml *llm.GGML // TODO - how large is this, and do we need to free it after we've finished loading?
|
||||
opts api.Options
|
||||
sessionDuration time.Duration
|
||||
successCh chan *runnerRef
|
||||
@@ -39,7 +38,7 @@ type Scheduler struct {
|
||||
loaded map[string]*runnerRef
|
||||
loadedMu sync.Mutex
|
||||
|
||||
loadFn func(req *LlmRequest, gpus gpu.GpuInfoList)
|
||||
loadFn func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
|
||||
newServerFn func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error)
|
||||
getGpuFn func() gpu.GpuInfoList
|
||||
}
|
||||
@@ -47,6 +46,7 @@ type Scheduler struct {
|
||||
// TODO set this to zero after a release or two, to enable multiple models by default
|
||||
var loadedMax = 1 // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
|
||||
var maxQueuedRequests = 10 // TODO configurable
|
||||
var numParallel = 1
|
||||
|
||||
func InitScheduler(ctx context.Context) *Scheduler {
|
||||
maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
|
||||
@@ -58,6 +58,14 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
loadedMax = m
|
||||
}
|
||||
}
|
||||
if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
|
||||
p, err := strconv.Atoi(onp)
|
||||
if err != nil || p <= 0 {
|
||||
slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
|
||||
} else {
|
||||
numParallel = p
|
||||
}
|
||||
}
|
||||
|
||||
sched := &Scheduler{
|
||||
pendingReqCh: make(chan *LlmRequest, maxQueuedRequests),
|
||||
@@ -74,20 +82,16 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
|
||||
// context must be canceled to decrement ref count and release the runner
|
||||
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
|
||||
ggml, err := llm.LoadModel(model.ModelPath)
|
||||
req := &LlmRequest{
|
||||
ctx: c,
|
||||
model: model,
|
||||
ggml: ggml,
|
||||
opts: opts,
|
||||
sessionDuration: sessionDuration,
|
||||
successCh: make(chan *runnerRef),
|
||||
errCh: make(chan error, 1),
|
||||
}
|
||||
if err != nil {
|
||||
req.errCh <- err
|
||||
return req.successCh, req.errCh
|
||||
}
|
||||
// context split across parallel threads
|
||||
opts.NumCtx = opts.NumCtx * numParallel
|
||||
select {
|
||||
case s.pendingReqCh <- req:
|
||||
default:
|
||||
@@ -130,28 +134,39 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
pending.useLoadedRunner(runner, s.finishedReqCh)
|
||||
break
|
||||
}
|
||||
} else if loadedCount == 0 {
|
||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||
gpus := s.getGpuFn()
|
||||
g := pickBestFitGPUs(pending, gpus)
|
||||
if g != nil {
|
||||
gpus = g
|
||||
}
|
||||
s.loadFn(pending, gpus)
|
||||
break
|
||||
} else if loadedMax > 0 && loadedCount >= loadedMax {
|
||||
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
|
||||
runnerToExpire = s.findRunnerToUnload(pending)
|
||||
} else {
|
||||
// More than one loaded model, so we have to see if the new one fits
|
||||
// Either no models are loaded or below loadedMax
|
||||
// Get a refreshed GPU list
|
||||
gpus := s.getGpuFn()
|
||||
|
||||
// Load model for fitting
|
||||
ggml, err := llm.LoadModel(pending.model.ModelPath)
|
||||
if err != nil {
|
||||
pending.errCh <- err
|
||||
break
|
||||
}
|
||||
|
||||
// No models loaded. Load the model but prefer the best fit.
|
||||
if loadedCount == 0 {
|
||||
slog.Debug("loading first model", "model", pending.model.ModelPath)
|
||||
g := pickBestFitGPUs(pending, ggml, gpus)
|
||||
if g != nil {
|
||||
gpus = g
|
||||
}
|
||||
s.loadFn(pending, ggml, gpus)
|
||||
break
|
||||
}
|
||||
|
||||
// More than one loaded model, so we have to see if the new one fits
|
||||
// Update free memory from currently loaded models
|
||||
s.updateFreeSpace(gpus)
|
||||
gpus = pickBestFitGPUs(pending, gpus)
|
||||
gpus = pickBestFitGPUs(pending, ggml, gpus)
|
||||
if gpus != nil {
|
||||
slog.Debug("new model fits with existing models, loading")
|
||||
s.loadFn(pending, gpus)
|
||||
s.loadFn(pending, ggml, gpus)
|
||||
break
|
||||
}
|
||||
runnerToExpire = s.findRunnerToUnload(pending)
|
||||
@@ -282,8 +297,8 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
|
||||
}()
|
||||
}
|
||||
|
||||
func (s *Scheduler) load(req *LlmRequest, gpus gpu.GpuInfoList) {
|
||||
llama, err := s.newServerFn(gpus, req.model.ModelPath, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
|
||||
func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) {
|
||||
llama, err := s.newServerFn(gpus, req.model.ModelPath, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
|
||||
if err != nil {
|
||||
// some older models are not compatible with newer versions of llama.cpp
|
||||
// show a generalized compatibility error until there is a better way to
|
||||
@@ -417,16 +432,21 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||
slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
|
||||
runner.refMu.Lock()
|
||||
defer runner.refMu.Unlock()
|
||||
// Ignore the NumGPU settings for comparison
|
||||
optsExisting := runner.Options.Runner
|
||||
optsExisting.NumGPU = -1
|
||||
optsNew := req.opts.Runner
|
||||
optsNew.NumGPU = -1
|
||||
|
||||
timeout := 10 * time.Second
|
||||
if runner.loading {
|
||||
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
|
||||
|
||||
// Don't reload runner if num_gpu=-1 was provided
|
||||
optsExisting := runner.Options.Runner
|
||||
optsNew := req.opts.Runner
|
||||
if optsNew.NumGPU < 0 {
|
||||
optsExisting.NumGPU = -1
|
||||
optsNew.NumGPU = -1
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
|
||||
!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
|
||||
@@ -434,6 +454,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||
runner.llama.Ping(ctx) != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -454,7 +475,7 @@ func (a ByDuration) Less(i, j int) bool {
|
||||
|
||||
// pickBestFitGPUs will try to find the optimal placement of the model in the available GPUs where the model fully fits
|
||||
// If the model can not be fit fully within the available GPU(s) nil is returned
|
||||
func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
var estimatedVRAM uint64
|
||||
for _, gl := range gpus.ByLibrary() {
|
||||
var ok bool
|
||||
@@ -466,7 +487,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
|
||||
// First attempt to fit the model into a single GPU
|
||||
for _, g := range sgl {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
slog.Debug("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM))
|
||||
return []gpu.GpuInfo{g}
|
||||
}
|
||||
@@ -477,7 +498,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
|
||||
// - try subsets of GPUs instead of just falling back to 1 or all in a family
|
||||
|
||||
// Now try all the GPUs
|
||||
if ok, estimatedVRAM = llm.PredictServerFit(gl, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||
slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
|
||||
return gl
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ func TestLoad(t *testing.T) {
|
||||
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
|
||||
defer done()
|
||||
s := InitScheduler(ctx)
|
||||
var ggml *llm.GGML // value not used in tests
|
||||
req := &LlmRequest{
|
||||
ctx: ctx,
|
||||
model: &Model{ModelPath: "foo"},
|
||||
@@ -59,7 +60,7 @@ func TestLoad(t *testing.T) {
|
||||
return nil, fmt.Errorf("something failed to load model blah")
|
||||
}
|
||||
gpus := gpu.GpuInfoList{}
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
require.Len(t, req.successCh, 0)
|
||||
require.Len(t, req.errCh, 1)
|
||||
require.Len(t, s.loaded, 0)
|
||||
@@ -70,7 +71,7 @@ func TestLoad(t *testing.T) {
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
|
||||
return server, nil
|
||||
}
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
select {
|
||||
case err := <-req.errCh:
|
||||
require.NoError(t, err)
|
||||
@@ -82,7 +83,7 @@ func TestLoad(t *testing.T) {
|
||||
|
||||
req.model.ModelPath = "dummy_model_path"
|
||||
server.waitResp = fmt.Errorf("wait failure")
|
||||
s.load(req, gpus)
|
||||
s.load(req, ggml, gpus)
|
||||
select {
|
||||
case err := <-req.errCh:
|
||||
require.Contains(t, err.Error(), "wait failure")
|
||||
@@ -101,6 +102,7 @@ type bundle struct {
|
||||
ctxDone func()
|
||||
srv *mockLlm
|
||||
req *LlmRequest
|
||||
ggml *llm.GGML
|
||||
}
|
||||
|
||||
func (scenario *bundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
|
||||
@@ -132,14 +134,15 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
|
||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
})
|
||||
assert.Nil(t, err)
|
||||
|
||||
fname := f.Name()
|
||||
model := &Model{Name: modelName, ModelPath: fname}
|
||||
ggml, err := llm.LoadModel(model.ModelPath)
|
||||
scenario.ggml, err = llm.LoadModel(model.ModelPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
scenario.req = &LlmRequest{
|
||||
ctx: scenario.ctx,
|
||||
model: model,
|
||||
ggml: ggml,
|
||||
sessionDuration: 5 * time.Millisecond,
|
||||
successCh: make(chan *runnerRef, 1),
|
||||
errCh: make(chan error, 1),
|
||||
@@ -157,13 +160,13 @@ func TestRequests(t *testing.T) {
|
||||
scenario1a.req.sessionDuration = 0
|
||||
scenario1b := newScenario(t, ctx, "ollama-model-1", 11)
|
||||
scenario1b.req.model = scenario1a.req.model
|
||||
scenario1b.req.ggml = scenario1a.req.ggml
|
||||
scenario1b.ggml = scenario1a.ggml
|
||||
scenario1b.req.sessionDuration = 0
|
||||
|
||||
// simple reload of same model
|
||||
scenario2a := newScenario(t, ctx, "ollama-model-1", 20)
|
||||
scenario2a.req.model = scenario1a.req.model
|
||||
scenario2a.req.ggml = scenario1a.req.ggml
|
||||
scenario2a.ggml = scenario1a.ggml
|
||||
|
||||
// Multiple loaded models
|
||||
scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
|
||||
@@ -322,13 +325,14 @@ func TestGetRunner(t *testing.T) {
|
||||
successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
|
||||
require.Len(t, s.pendingReqCh, 0)
|
||||
require.Len(t, successCh1c, 0)
|
||||
require.Len(t, errCh1c, 0)
|
||||
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
require.Len(t, s.loaded, 0)
|
||||
require.Len(t, errCh1c, 1)
|
||||
err = <-errCh1c
|
||||
require.Contains(t, err.Error(), "bad path")
|
||||
scenario1b.ctxDone()
|
||||
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
require.Len(t, s.loaded, 0)
|
||||
}
|
||||
|
||||
// TODO - add one scenario that triggers the bogus finished event with positive ref count
|
||||
@@ -366,7 +370,9 @@ func TestPrematureExpired(t *testing.T) {
|
||||
require.LessOrEqual(t, len(s.finishedReqCh), 1)
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
require.Len(t, s.finishedReqCh, 0)
|
||||
s.loadedMu.Lock()
|
||||
require.Len(t, s.loaded, 0)
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
// also shouldn't happen in real life
|
||||
s.finishedReqCh <- scenario1a.req
|
||||
@@ -426,7 +432,6 @@ func TestUpdateFreeSpace(t *testing.T) {
|
||||
s.updateFreeSpace(gpus)
|
||||
require.Equal(t, uint64(850), gpus[0].FreeMemory)
|
||||
require.Equal(t, uint64(1850), gpus[1].FreeMemory)
|
||||
|
||||
}
|
||||
|
||||
func TestFindRunnerToUnload(t *testing.T) {
|
||||
@@ -485,6 +490,9 @@ func TestNeedsReload(t *testing.T) {
|
||||
require.False(t, resp)
|
||||
req.opts.NumGPU = 99
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.True(t, resp)
|
||||
req.opts.NumGPU = -1
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.False(t, resp)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// Digest represents a digest of a model Manifest. It is a comparable value
|
||||
// type and is immutable.
|
||||
//
|
||||
// The zero Digest is not a valid digest.
|
||||
type Digest struct {
|
||||
s string
|
||||
}
|
||||
|
||||
// Split returns the digest type and the digest value.
|
||||
func (d Digest) Split() (typ, digest string) {
|
||||
typ, digest, _ = strings.Cut(d.s, "-")
|
||||
return
|
||||
}
|
||||
|
||||
// String returns the digest in the form of "<digest-type>-<digest>", or the
|
||||
// empty string if the digest is invalid.
|
||||
func (d Digest) String() string { return d.s }
|
||||
|
||||
// IsValid returns true if the digest is valid (not zero).
|
||||
//
|
||||
// A valid digest may be created only by ParseDigest, or
|
||||
// ParseName(name).Digest().
|
||||
func (d Digest) IsValid() bool { return d.s != "" }
|
||||
|
||||
// LogValue implements slog.Value.
|
||||
func (d Digest) LogValue() slog.Value {
|
||||
return slog.StringValue(d.String())
|
||||
}
|
||||
|
||||
var (
|
||||
_ slog.LogValuer = Digest{}
|
||||
)
|
||||
|
||||
// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
|
||||
// Digest.
|
||||
func ParseDigest(s string) Digest {
|
||||
typ, digest, ok := strings.Cut(s, "-")
|
||||
if !ok {
|
||||
typ, digest, ok = strings.Cut(s, ":")
|
||||
}
|
||||
if ok && isValidDigestType(typ) && isValidHex(digest) && len(digest) >= 2 {
|
||||
return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
|
||||
}
|
||||
return Digest{}
|
||||
}
|
||||
|
||||
func MustParseDigest(s string) Digest {
|
||||
d := ParseDigest(s)
|
||||
if !d.IsValid() {
|
||||
panic(fmt.Sprintf("invalid digest: %q", s))
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func isValidDigestType(s string) bool {
|
||||
if len(s) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, r := range s {
|
||||
if !unicode.IsLower(r) && !unicode.IsDigit(r) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isValidHex(s string) bool {
|
||||
if len(s) == 0 {
|
||||
return false
|
||||
}
|
||||
for i := range s {
|
||||
c := s[i]
|
||||
if c < '0' || c > '9' && c < 'a' || c > 'f' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
package model
|
||||
|
||||
import "testing"
|
||||
|
||||
var testDigests = map[string]Digest{
|
||||
"": {},
|
||||
"sha256-1234": {s: "sha256-1234"},
|
||||
"sha256-5678": {s: "sha256-5678"},
|
||||
"blake2-9abc": {s: "blake2-9abc"},
|
||||
"-1234": {},
|
||||
"sha256-": {},
|
||||
"sha256-1234-5678": {},
|
||||
"sha256-P": {}, // invalid hex
|
||||
"sha256-1234P": {},
|
||||
"---": {},
|
||||
}
|
||||
|
||||
func TestDigestParse(t *testing.T) {
|
||||
// Test cases.
|
||||
for s, want := range testDigests {
|
||||
got := ParseDigest(s)
|
||||
t.Logf("ParseDigest(%q) = %#v", s, got)
|
||||
if got != want {
|
||||
t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDigestString(t *testing.T) {
|
||||
// Test cases.
|
||||
for s, d := range testDigests {
|
||||
want := s
|
||||
if !d.IsValid() {
|
||||
want = ""
|
||||
}
|
||||
got := d.String()
|
||||
if got != want {
|
||||
t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
|
||||
}
|
||||
|
||||
got = ParseDigest(s).String()
|
||||
if got != want {
|
||||
t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,715 +1,313 @@
|
||||
// Package model contains types and utilities for parsing, validating, and
|
||||
// working with model names and digests.
|
||||
package model
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/maphash"
|
||||
"io"
|
||||
"log/slog"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/types/structs"
|
||||
)
|
||||
|
||||
// Errors
|
||||
var (
|
||||
// ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not
|
||||
// used by this package, but are exported so that other packages can
|
||||
// use them, instead of defining their own errors for them.
|
||||
ErrInvalidName = errors.New("invalid model name")
|
||||
ErrIncompleteName = errors.New("incomplete model name")
|
||||
ErrInvalidDigest = errors.New("invalid digest")
|
||||
// ErrUnqualifiedName represents an error where a name is not fully
|
||||
// qualified. It is not used directly in this package, but is here
|
||||
// to avoid other packages inventing their own error type.
|
||||
// Additionally, it can be conveniently used via [Unqualified].
|
||||
ErrUnqualifiedName = errors.New("unqualified name")
|
||||
)
|
||||
|
||||
// Defaults
|
||||
const (
|
||||
// MaskDefault is the default mask used by [Name.DisplayShortest].
|
||||
MaskDefault = "registry.ollama.ai/library/?:latest"
|
||||
|
||||
// MaskNothing is a mask that masks nothing.
|
||||
MaskNothing = "?/?/?:?"
|
||||
|
||||
// DefaultFill is the default fill used by [ParseName].
|
||||
FillDefault = "registry.ollama.ai/library/?:latest+Q4_0"
|
||||
|
||||
// FillNothing is a fill that fills nothing.
|
||||
FillNothing = "?/?/?:?+?"
|
||||
)
|
||||
|
||||
const MaxNamePartLen = 128
|
||||
|
||||
type PartKind int
|
||||
|
||||
// Levels of concreteness
|
||||
const (
|
||||
// Each value aligns with its index in the Name.parts array.
|
||||
|
||||
PartHost PartKind = iota
|
||||
PartNamespace
|
||||
PartModel
|
||||
PartTag
|
||||
PartBuild
|
||||
PartDigest
|
||||
|
||||
// NumParts is the number of parts in a Name. In this list, it must
|
||||
// follow the final part.
|
||||
NumParts
|
||||
|
||||
PartExtraneous = -1
|
||||
)
|
||||
|
||||
var kindNames = map[PartKind]string{
|
||||
PartHost: "Host",
|
||||
PartNamespace: "Namespace",
|
||||
PartModel: "Name",
|
||||
PartTag: "Tag",
|
||||
PartBuild: "Build",
|
||||
PartDigest: "Digest",
|
||||
// Unqualified is a helper function that returns an error with
|
||||
// ErrUnqualifiedName as the cause and the name as the message.
|
||||
func Unqualified(n Name) error {
|
||||
return fmt.Errorf("%w: %s", ErrUnqualifiedName, n)
|
||||
}
|
||||
|
||||
func (k PartKind) String() string {
|
||||
return cmp.Or(kindNames[k], "Unknown")
|
||||
// MissingPart is used to indicate any part of a name that was "promised" by
|
||||
// the presence of a separator, but is missing.
|
||||
//
|
||||
// The value was chosen because it is deemed unlikely to be set by a user,
|
||||
// not a valid part name valid when checked by [Name.IsValid], and easy to
|
||||
// spot in logs.
|
||||
const MissingPart = "!MISSING!"
|
||||
|
||||
// DefaultName returns a name with the default values for the host, namespace,
|
||||
// and tag parts. The model and digest parts are empty.
|
||||
//
|
||||
// - The default host is ("registry.ollama.ai")
|
||||
// - The default namespace is ("library")
|
||||
// - The default tag is ("latest")
|
||||
func DefaultName() Name {
|
||||
return Name{
|
||||
Host: "registry.ollama.ai",
|
||||
Namespace: "library",
|
||||
Tag: "latest",
|
||||
}
|
||||
}
|
||||
|
||||
// Name is an opaque reference to a model. It holds the parts of a model
|
||||
// with the case preserved, but is not directly comparable with other Names
|
||||
// since model names can be represented with different casing depending on
|
||||
// the use case. For instance, "Mistral" and "mistral" are the same model
|
||||
// but each version may have come from different sources (e.g. copied from a
|
||||
// Web page, or from a file path).
|
||||
type partKind int
|
||||
|
||||
const (
|
||||
kindHost partKind = iota
|
||||
kindNamespace
|
||||
kindModel
|
||||
kindTag
|
||||
kindDigest
|
||||
)
|
||||
|
||||
func (k partKind) String() string {
|
||||
switch k {
|
||||
case kindHost:
|
||||
return "host"
|
||||
case kindNamespace:
|
||||
return "namespace"
|
||||
case kindModel:
|
||||
return "model"
|
||||
case kindTag:
|
||||
return "tag"
|
||||
case kindDigest:
|
||||
return "digest"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// Name is a structured representation of a model name string, as defined by
|
||||
// [ParseNameNoDefaults].
|
||||
//
|
||||
// Valid Names can ONLY be constructed by calling [ParseName].
|
||||
// It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
|
||||
// is valid.
|
||||
//
|
||||
// A Name is valid if and only if is have a valid Model part. The other parts
|
||||
// are optional.
|
||||
//
|
||||
// A Name is considered "complete" if it has all parts present. To check if a
|
||||
// Name is complete, use [Name.IsComplete].
|
||||
//
|
||||
// To compare two names in a case-insensitive manner, use [Name.EqualFold].
|
||||
//
|
||||
// The parts of a Name are:
|
||||
//
|
||||
// - Host: the domain of the model (optional)
|
||||
// - Namespace: the namespace of the model (optional)
|
||||
// - Model: the name of the model (required)
|
||||
// - Tag: the tag of the model (optional)
|
||||
// - Build: the build of the model; usually the quantization or "file type" (optional)
|
||||
//
|
||||
// The parts can be obtained in their original form by calling [Name.Parts].
|
||||
//
|
||||
// To check if a Name has at minimum a valid model part, use [Name.IsValid].
|
||||
// It is not directly comparable with other Names. Use [Name.Equal] and
|
||||
// [Name.MapHash] for determining equality and using as a map key.
|
||||
type Name struct {
|
||||
_ structs.Incomparable
|
||||
parts [NumParts]string // host, namespace, model, tag, build, digest
|
||||
|
||||
// TODO(bmizerany): track offsets and hold s (raw string) here? We
|
||||
// could pack the offsets all into a single uint64 since the first
|
||||
// parts take less bits since their max offset is less than the max
|
||||
// offset of the next part. This would save a ton of bytes per Name
|
||||
// and mean zero allocations for String.
|
||||
Host string
|
||||
Namespace string
|
||||
Model string
|
||||
Tag string
|
||||
RawDigest string
|
||||
}
|
||||
|
||||
// ParseName parses s into a Name, and returns the result of filling it with
|
||||
// defaults. The input string must be a valid string
|
||||
// representation of a model name in the form:
|
||||
// ParseName parses and assembles a Name from a name string. The
|
||||
// format of a valid name string is:
|
||||
//
|
||||
// [host/][namespace/]<model>[:tag][+build][@<digest-type>-<digest>]
|
||||
// s:
|
||||
// { host } "/" { namespace } "/" { model } ":" { tag } "@" { digest }
|
||||
// { host } "/" { namespace } "/" { model } ":" { tag }
|
||||
// { host } "/" { namespace } "/" { model } "@" { digest }
|
||||
// { host } "/" { namespace } "/" { model }
|
||||
// { namespace } "/" { model } ":" { tag } "@" { digest }
|
||||
// { namespace } "/" { model } ":" { tag }
|
||||
// { namespace } "/" { model } "@" { digest }
|
||||
// { namespace } "/" { model }
|
||||
// { model } ":" { tag } "@" { digest }
|
||||
// { model } ":" { tag }
|
||||
// { model } "@" { digest }
|
||||
// { model }
|
||||
// "@" { digest }
|
||||
// host:
|
||||
// pattern: alphanum { alphanum | "-" | "_" | "." | ":" }*
|
||||
// length: [1, 350]
|
||||
// namespace:
|
||||
// pattern: alphanum { alphanum | "-" | "_" }*
|
||||
// length: [2, 80]
|
||||
// model:
|
||||
// pattern: alphanum { alphanum | "-" | "_" | "." }*
|
||||
// length: [2, 80]
|
||||
// tag:
|
||||
// pattern: alphanum { alphanum | "-" | "_" | "." }*
|
||||
// length: [1, 80]
|
||||
// digest:
|
||||
// pattern: alphanum { alphanum | "-" | ":" }*
|
||||
// length: [2, 80]
|
||||
//
|
||||
// The name part is required, all others are optional. If a part is missing,
|
||||
// it is left empty in the returned Name. If a part is invalid, the zero Ref
|
||||
// value is returned.
|
||||
// Most users should use [ParseName] instead, unless need to support
|
||||
// different defaults than DefaultName.
|
||||
//
|
||||
// The build part is normalized to uppercase.
|
||||
//
|
||||
// Examples of valid paths:
|
||||
//
|
||||
// "example.com/library/mistral:7b+x"
|
||||
// "example.com/eva/mistral:7b+Q4_0"
|
||||
// "mistral:7b+x"
|
||||
// "example.com/mike/mistral:latest+Q4_0"
|
||||
// "example.com/bruce/mistral:latest"
|
||||
// "example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef"
|
||||
//
|
||||
// Examples of invalid paths:
|
||||
//
|
||||
// "example.com/mistral:7b+"
|
||||
// "example.com/mistral:7b+Q4_0+"
|
||||
// "x/y/z/z:8n+I"
|
||||
// ""
|
||||
//
|
||||
// It returns the zero value if any part is invalid.
|
||||
//
|
||||
// # Fills
|
||||
//
|
||||
// For any valid s, the fill string is used to fill in missing parts of the
|
||||
// Name. The fill string must be a valid Name with the exception that any part
|
||||
// may be the string ("?"), which will not be considered for filling.
|
||||
func ParseNameFill(s, fill string) Name {
|
||||
var r Name
|
||||
parts(s)(func(kind PartKind, part string) bool {
|
||||
if kind == PartDigest && !ParseDigest(part).IsValid() {
|
||||
r = Name{}
|
||||
return false
|
||||
}
|
||||
if kind == PartExtraneous || !IsValidNamePart(kind, part) {
|
||||
r = Name{}
|
||||
return false
|
||||
}
|
||||
r.parts[kind] = part
|
||||
return true
|
||||
})
|
||||
if r.IsValid() || r.IsResolved() {
|
||||
return fillName(r, fill)
|
||||
}
|
||||
return Name{}
|
||||
}
|
||||
|
||||
// ParseName parses s into a Name, and returns the result of filling it
|
||||
// with FillDefault. The input string must be a valid string representation
|
||||
// of a model
|
||||
// The name returned is not guaranteed to be valid. If it is not valid, the
|
||||
// field values are left in an undefined state. Use [Name.IsValid] to check
|
||||
// if the name is valid.
|
||||
func ParseName(s string) Name {
|
||||
return ParseNameFill(s, "")
|
||||
return Merge(ParseNameBare(s), DefaultName())
|
||||
}
|
||||
|
||||
func parseMask(s string) Name {
|
||||
var r Name
|
||||
parts(s)(func(kind PartKind, part string) bool {
|
||||
if part == "?" {
|
||||
// mask part; treat as empty but valid
|
||||
return true
|
||||
}
|
||||
if !IsValidNamePart(kind, part) {
|
||||
panic(fmt.Errorf("invalid mask part %s: %q", kind, part))
|
||||
}
|
||||
r.parts[kind] = part
|
||||
return true
|
||||
})
|
||||
return r
|
||||
}
|
||||
// ParseNameBare parses s as a name string and returns a Name. No merge with
|
||||
// [DefaultName] is performed.
|
||||
func ParseNameBare(s string) Name {
|
||||
var n Name
|
||||
var promised bool
|
||||
|
||||
func MustParseName(s, fill string) Name {
|
||||
r := ParseNameFill(s, fill)
|
||||
if !r.IsValid() {
|
||||
panic("invalid Name: " + s)
|
||||
s, n.RawDigest, promised = cutLast(s, "@")
|
||||
if promised && n.RawDigest == "" {
|
||||
n.RawDigest = MissingPart
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// fillName fills in the missing parts of dst with the parts of src.
|
||||
//
|
||||
// The returned Name will only be valid if dst is valid.
|
||||
//
|
||||
// It skips fill parts that are "?".
|
||||
func fillName(r Name, fill string) Name {
|
||||
fill = cmp.Or(fill, FillDefault)
|
||||
f := parseMask(fill)
|
||||
if fill != FillNothing && f.IsZero() {
|
||||
panic("invalid fill")
|
||||
s, n.Tag, _ = cutPromised(s, ":")
|
||||
s, n.Model, promised = cutPromised(s, "/")
|
||||
if !promised {
|
||||
n.Model = s
|
||||
return n
|
||||
}
|
||||
for i := range r.parts {
|
||||
if f.parts[i] == "?" {
|
||||
continue
|
||||
}
|
||||
r.parts[i] = cmp.Or(r.parts[i], f.parts[i])
|
||||
s, n.Namespace, promised = cutPromised(s, "/")
|
||||
if !promised {
|
||||
n.Namespace = s
|
||||
return n
|
||||
}
|
||||
return r
|
||||
n.Host = s
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
// WithBuild returns a copy of r with the build set to the given string.
|
||||
func (r Name) WithBuild(build string) Name {
|
||||
r.parts[PartBuild] = build
|
||||
return r
|
||||
// Merge merges the host, namespace, and tag parts of the two names,
|
||||
// preferring the non-empty parts of a.
|
||||
func Merge(a, b Name) Name {
|
||||
a.Host = cmp.Or(a.Host, b.Host)
|
||||
a.Namespace = cmp.Or(a.Namespace, b.Namespace)
|
||||
a.Tag = cmp.Or(a.Tag, b.Tag)
|
||||
return a
|
||||
}
|
||||
|
||||
func (r Name) WithDigest(digest Digest) Name {
|
||||
r.parts[PartDigest] = digest.String()
|
||||
return r
|
||||
}
|
||||
|
||||
var mapHashSeed = maphash.MakeSeed()
|
||||
|
||||
// MapHash returns a case insensitive hash for use in maps and equality
|
||||
// checks. For a convenient way to compare names, use [Name.EqualFold].
|
||||
//
|
||||
//nolint:errcheck
|
||||
func (r Name) MapHash() uint64 {
|
||||
// correctly hash the parts with case insensitive comparison
|
||||
var h maphash.Hash
|
||||
h.SetSeed(mapHashSeed)
|
||||
for _, part := range r.parts {
|
||||
// downcase the part for hashing
|
||||
for i := range part {
|
||||
c := part[i]
|
||||
if c >= 'A' && c <= 'Z' {
|
||||
c = c - 'A' + 'a'
|
||||
}
|
||||
h.WriteByte(c)
|
||||
}
|
||||
// String returns the name string, in the format that [ParseNameNoDefaults]
|
||||
// accepts as valid, if [Name.IsValid] reports true; otherwise the empty
|
||||
// string is returned.
|
||||
func (n Name) String() string {
|
||||
var b strings.Builder
|
||||
if n.Host != "" {
|
||||
b.WriteString(n.Host)
|
||||
b.WriteByte('/')
|
||||
}
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func (r Name) slice(from, to PartKind) Name {
|
||||
var v Name
|
||||
copy(v.parts[from:to+1], r.parts[from:to+1])
|
||||
return v
|
||||
}
|
||||
|
||||
// DisplayShortest returns the shortest possible, masked display string in form:
|
||||
//
|
||||
// [host/][<namespace>/]<model>[:<tag>]
|
||||
//
|
||||
// # Masks
|
||||
//
|
||||
// The mask is a string that specifies which parts of the name to omit based
|
||||
// on case-insensitive comparison. [Name.DisplayShortest] omits parts of the name
|
||||
// that are the same as the mask, moving from left to right until the first
|
||||
// unequal part is found. It then moves right to left until the first unequal
|
||||
// part is found. The result is the shortest possible display string.
|
||||
//
|
||||
// Unlike a [Name] the mask can contain "?" characters which are treated as
|
||||
// wildcards. A "?" will never match a part of the name, since a valid name
|
||||
// can never contain a "?" character.
|
||||
//
|
||||
// For example: Given a Name ("registry.ollama.ai/library/mistral:latest") masked
|
||||
// with ("registry.ollama.ai/library/?:latest") will produce the display string
|
||||
// ("mistral").
|
||||
//
|
||||
// If mask is the empty string, then [MaskDefault] is used.
|
||||
//
|
||||
// DisplayShortest panics if the mask is not the empty string, MaskNothing, and
|
||||
// invalid.
|
||||
//
|
||||
// # Builds
|
||||
//
|
||||
// For now, DisplayShortest does consider the build or return one in the
|
||||
// result. We can lift this restriction when needed.
|
||||
func (r Name) DisplayShortest(mask string) string {
|
||||
mask = cmp.Or(mask, MaskDefault)
|
||||
d := parseMask(mask)
|
||||
if mask != MaskNothing && r.IsZero() {
|
||||
panic("invalid Name")
|
||||
if n.Namespace != "" {
|
||||
b.WriteString(n.Namespace)
|
||||
b.WriteByte('/')
|
||||
}
|
||||
for i := range PartTag {
|
||||
if !strings.EqualFold(r.parts[i], d.parts[i]) {
|
||||
break
|
||||
}
|
||||
r.parts[i] = ""
|
||||
b.WriteString(n.Model)
|
||||
if n.Tag != "" {
|
||||
b.WriteByte(':')
|
||||
b.WriteString(n.Tag)
|
||||
}
|
||||
for i := PartTag; i >= 0; i-- {
|
||||
if !strings.EqualFold(r.parts[i], d.parts[i]) {
|
||||
break
|
||||
}
|
||||
r.parts[i] = ""
|
||||
if n.RawDigest != "" {
|
||||
b.WriteByte('@')
|
||||
b.WriteString(n.RawDigest)
|
||||
}
|
||||
return r.slice(PartHost, PartTag).DisplayLong()
|
||||
}
|
||||
|
||||
// DisplayLongest returns the result of r.DisplayShortest(MaskNothing).
|
||||
func (r Name) DisplayLongest() string {
|
||||
return r.DisplayShortest(MaskNothing)
|
||||
}
|
||||
|
||||
var seps = [...]string{
|
||||
PartHost: "/",
|
||||
PartNamespace: "/",
|
||||
PartModel: ":",
|
||||
PartTag: "+",
|
||||
PartBuild: "@",
|
||||
PartDigest: "",
|
||||
}
|
||||
|
||||
// WriteTo implements io.WriterTo. It writes the fullest possible display
|
||||
// string in form:
|
||||
//
|
||||
// <host>/<namespace>/<model>:<tag>+<build>@<digest-type>-<digest>
|
||||
//
|
||||
// Missing parts and their separators are not written.
|
||||
//
|
||||
// The full digest is always prefixed with "@". That is if [Name.IsValid]
|
||||
// reports false and [Name.IsResolved] reports true, then the string is
|
||||
// returned as "@<digest-type>-<digest>".
|
||||
func (r Name) writeTo(w io.StringWriter) error {
|
||||
var partsWritten int
|
||||
for i := range r.parts {
|
||||
if r.parts[i] == "" {
|
||||
continue
|
||||
}
|
||||
if partsWritten > 0 || i == int(PartDigest) {
|
||||
if _, err := w.WriteString(seps[i-1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := w.WriteString(r.parts[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
partsWritten++
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var builderPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &strings.Builder{}
|
||||
},
|
||||
}
|
||||
|
||||
// DisplayLong returns the fullest possible display string in form:
|
||||
//
|
||||
// <host>/<namespace>/<model>:<tag>+<build>
|
||||
//
|
||||
// If any part is missing, it is omitted from the display string.
|
||||
func (r Name) DisplayLong() string {
|
||||
b := builderPool.Get().(*strings.Builder)
|
||||
defer builderPool.Put(b)
|
||||
b.Reset()
|
||||
b.Grow(50) // arbitrarily long enough for most names
|
||||
_ = r.writeTo(b)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// GoString implements fmt.GoStringer. It returns a string suitable for
|
||||
// debugging and logging. It is similar to [Name.DisplayLong] but it always
|
||||
// returns a string that includes all parts of the Name, with missing parts
|
||||
// replaced with a ("?").
|
||||
func (r Name) GoString() string {
|
||||
for i := range r.parts {
|
||||
r.parts[i] = cmp.Or(r.parts[i], "?")
|
||||
}
|
||||
return r.DisplayLong()
|
||||
}
|
||||
|
||||
// LogValue implements slog.Valuer.
|
||||
func (r Name) LogValue() slog.Value {
|
||||
return slog.StringValue(r.GoString())
|
||||
}
|
||||
|
||||
// IsComplete reports whether the Name is fully qualified. That is it has a
|
||||
// domain, namespace, name, tag, and build.
|
||||
func (r Name) IsComplete() bool {
|
||||
return !slices.Contains(r.parts[:PartDigest], "")
|
||||
}
|
||||
|
||||
// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the
|
||||
// build part to be present.
|
||||
func (r Name) IsCompleteNoBuild() bool {
|
||||
return !slices.Contains(r.parts[:PartBuild], "")
|
||||
}
|
||||
|
||||
// IsResolved reports true if the Name has a valid digest.
|
||||
//
|
||||
// It is possible to have a valid Name, or a complete Name that is not
|
||||
// resolved.
|
||||
func (r Name) IsResolved() bool {
|
||||
return r.Digest().IsValid()
|
||||
}
|
||||
|
||||
// Digest returns the digest part of the Name, if any.
|
||||
//
|
||||
// If Digest returns a non-empty string, then [Name.IsResolved] will return
|
||||
// true, and digest is considered valid.
|
||||
func (r Name) Digest() Digest {
|
||||
// This was already validated by ParseName, so we can just return it.
|
||||
return Digest{r.parts[PartDigest]}
|
||||
}
|
||||
|
||||
// EqualFold reports whether r and o are equivalent model names, ignoring
|
||||
// case.
|
||||
func (r Name) EqualFold(o Name) bool {
|
||||
return r.CompareFold(o) == 0
|
||||
}
|
||||
|
||||
// CompareFold performs a case-insensitive cmp.Compare on r and o.
|
||||
//
|
||||
// This can be used with [slices.SortFunc].
|
||||
//
|
||||
// For simple equality checks, use [Name.EqualFold].
|
||||
func (r Name) CompareFold(o Name) int {
|
||||
return slices.CompareFunc(r.parts[:], o.parts[:], compareFold)
|
||||
}
|
||||
|
||||
func compareFold(a, b string) int {
|
||||
return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int {
|
||||
return cmp.Compare(downcase(a), downcase(b))
|
||||
})
|
||||
}
|
||||
|
||||
func downcase(r rune) rune {
|
||||
if r >= 'A' && r <= 'Z' {
|
||||
return r - 'A' + 'a'
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r Name) Host() string { return r.parts[PartHost] }
|
||||
func (r Name) Namespace() string { return r.parts[PartNamespace] }
|
||||
func (r Name) Model() string { return r.parts[PartModel] }
|
||||
func (r Name) Build() string { return r.parts[PartBuild] }
|
||||
func (r Name) Tag() string { return r.parts[PartTag] }
|
||||
|
||||
// iter_Seq2 is a iter.Seq2 defined here to avoid the current build
|
||||
// restrictions in the go1.22 iter package requiring the
|
||||
// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag,
|
||||
// which we are not yet ready to support.
|
||||
//
|
||||
// Once we are ready to support rangefunc, this can be removed and replaced
|
||||
// with the iter.Seq2 type.
|
||||
type iter_Seq2[A, B any] func(func(A, B) bool)
|
||||
|
||||
// Parts returns a sequence of the parts of a Name string from most specific
|
||||
// to least specific.
|
||||
//
|
||||
// It normalizes the input string by removing "http://" and "https://" only.
|
||||
// No other normalizations are performed.
|
||||
func parts(s string) iter_Seq2[PartKind, string] {
|
||||
return func(yield func(PartKind, string) bool) {
|
||||
if strings.HasPrefix(s, "http://") {
|
||||
s = strings.TrimPrefix(s, "http://")
|
||||
} else {
|
||||
s = strings.TrimPrefix(s, "https://")
|
||||
}
|
||||
|
||||
if len(s) > MaxNamePartLen || len(s) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
partLen := 0
|
||||
state, j := PartDigest, len(s)
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
if partLen++; partLen > MaxNamePartLen {
|
||||
// catch a part that is too long early, so
|
||||
// we don't keep spinning on it, waiting for
|
||||
// an isInValidPart check which would scan
|
||||
// over it again.
|
||||
yield(state, s[i+1:j])
|
||||
return
|
||||
}
|
||||
|
||||
switch s[i] {
|
||||
case '@':
|
||||
switch state {
|
||||
case PartDigest:
|
||||
if !yield(PartDigest, s[i+1:j]) {
|
||||
return
|
||||
}
|
||||
if i == 0 {
|
||||
// This is the form
|
||||
// "@<digest>" which is valid.
|
||||
//
|
||||
// We're done.
|
||||
return
|
||||
}
|
||||
state, j, partLen = PartBuild, i, 0
|
||||
default:
|
||||
yield(PartExtraneous, s[i+1:j])
|
||||
return
|
||||
}
|
||||
case '+':
|
||||
switch state {
|
||||
case PartBuild, PartDigest:
|
||||
if !yield(PartBuild, s[i+1:j]) {
|
||||
return
|
||||
}
|
||||
state, j, partLen = PartTag, i, 0
|
||||
default:
|
||||
yield(PartExtraneous, s[i+1:j])
|
||||
return
|
||||
}
|
||||
case ':':
|
||||
switch state {
|
||||
case PartTag, PartBuild, PartDigest:
|
||||
if !yield(PartTag, s[i+1:j]) {
|
||||
return
|
||||
}
|
||||
state, j, partLen = PartModel, i, 0
|
||||
case PartHost:
|
||||
// noop: support for host:port
|
||||
default:
|
||||
yield(PartExtraneous, s[i+1:j])
|
||||
return
|
||||
}
|
||||
case '/':
|
||||
switch state {
|
||||
case PartModel, PartTag, PartBuild, PartDigest:
|
||||
if !yield(PartModel, s[i+1:j]) {
|
||||
return
|
||||
}
|
||||
state, j = PartNamespace, i
|
||||
case PartNamespace:
|
||||
if !yield(PartNamespace, s[i+1:j]) {
|
||||
return
|
||||
}
|
||||
state, j, partLen = PartHost, i, 0
|
||||
default:
|
||||
yield(PartExtraneous, s[i+1:j])
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if state <= PartNamespace {
|
||||
yield(state, s[:j])
|
||||
} else {
|
||||
yield(PartModel, s[:j])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r Name) IsZero() bool {
|
||||
return r.parts == [NumParts]string{}
|
||||
}
|
||||
|
||||
// IsValid reports if a model has at minimum a valid model part.
|
||||
func (r Name) IsValid() bool {
|
||||
// Parts ensures we only have valid parts, so no need to validate
|
||||
// them here, only check if we have a name or not.
|
||||
return r.parts[PartModel] != ""
|
||||
}
|
||||
|
||||
// ParseNameFromURLPath parses forms of a URL path into a Name. Specifically,
|
||||
// it trims any leading "/" and then calls [ParseName] with fill.
|
||||
func ParseNameFromURLPath(s, fill string) Name {
|
||||
s = strings.TrimPrefix(s, "/")
|
||||
return ParseNameFill(s, fill)
|
||||
}
|
||||
|
||||
func ParseNameFromURLPathFill(s, fill string) Name {
|
||||
return ParseNameFill(s, fill)
|
||||
}
|
||||
|
||||
// URLPath returns a complete, canonicalized, relative URL path using the parts of a
|
||||
// complete Name.
|
||||
//
|
||||
// The parts maintain their original case.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// ParseName("example.com/namespace/model:tag+build").URLPath() // returns "/example.com/namespace/model:tag"
|
||||
func (r Name) DisplayURLPath() string {
|
||||
return r.DisplayShortest(MaskNothing)
|
||||
}
|
||||
|
||||
// URLPath returns a complete, canonicalized, relative URL path using the parts of a
|
||||
// complete Name in the form:
|
||||
//
|
||||
// <host>/<namespace>/<model>/<tag>
|
||||
//
|
||||
// The parts are downcased.
|
||||
func (r Name) URLPath() string {
|
||||
return strings.ToLower(path.Join(r.parts[:PartBuild]...))
|
||||
}
|
||||
|
||||
// ParseNameFromFilepath parses a file path into a Name. The input string must be a
|
||||
// valid file path representation of a model name in the form:
|
||||
//
|
||||
// host/namespace/model/tag/build
|
||||
//
|
||||
// The zero valid is returned if s does not contain all path elements
|
||||
// leading up to the model part, or if any path element is an invalid part
|
||||
// for the its corresponding part kind.
|
||||
//
|
||||
// The fill string is used to fill in missing parts of any constructed Name.
|
||||
// See [ParseName] for more information on the fill string.
|
||||
func ParseNameFromFilepath(s, fill string) Name {
|
||||
var r Name
|
||||
for i := range PartBuild + 1 {
|
||||
part, rest, _ := strings.Cut(s, string(filepath.Separator))
|
||||
if !IsValidNamePart(i, part) {
|
||||
return Name{}
|
||||
}
|
||||
r.parts[i] = part
|
||||
s = rest
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
if s != "" {
|
||||
return Name{}
|
||||
}
|
||||
if !r.IsValid() {
|
||||
return Name{}
|
||||
}
|
||||
return fillName(r, fill)
|
||||
}
|
||||
|
||||
// Filepath returns a complete, canonicalized, relative file path using the
|
||||
// parts of a complete Name.
|
||||
//
|
||||
// Each parts is downcased, except for the build part which is upcased.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// ParseName("example.com/namespace/model:tag+build").Filepath() // returns "example.com/namespace/model/tag/BUILD"
|
||||
func (r Name) Filepath() string {
|
||||
for i := range r.parts {
|
||||
if PartKind(i) == PartBuild {
|
||||
r.parts[i] = strings.ToUpper(r.parts[i])
|
||||
} else {
|
||||
r.parts[i] = strings.ToLower(r.parts[i])
|
||||
}
|
||||
}
|
||||
return filepath.Join(r.parts[:]...)
|
||||
}
|
||||
|
||||
// FilepathNoBuild returns a complete, canonicalized, relative file path using
|
||||
// the parts of a complete Name, but without the build part.
|
||||
func (r Name) FilepathNoBuild() string {
|
||||
for i := range PartBuild {
|
||||
r.parts[i] = strings.ToLower(r.parts[i])
|
||||
}
|
||||
return filepath.Join(r.parts[:PartBuild]...)
|
||||
}
|
||||
|
||||
// IsValidNamePart reports if s contains all valid characters for the given
|
||||
// part kind and is under MaxNamePartLen bytes.
|
||||
func IsValidNamePart(kind PartKind, s string) bool {
|
||||
if len(s) > MaxNamePartLen {
|
||||
// IsValid reports whether all parts of the name are present and valid. The
|
||||
// digest is a special case, and is checked for validity only if present.
|
||||
func (n Name) IsValid() bool {
|
||||
if n.RawDigest != "" && !isValidPart(kindDigest, n.RawDigest) {
|
||||
return false
|
||||
}
|
||||
if s == "" {
|
||||
return false
|
||||
return n.IsFullyQualified()
|
||||
}
|
||||
|
||||
// IsFullyQualified returns true if all parts of the name are present and
|
||||
// valid without the digest.
|
||||
func (n Name) IsFullyQualified() bool {
|
||||
var parts = []string{
|
||||
n.Host,
|
||||
n.Namespace,
|
||||
n.Model,
|
||||
n.Tag,
|
||||
}
|
||||
var consecutiveDots int
|
||||
for i, c := range []byte(s) {
|
||||
if i == 0 && !isAlphaNumeric(c) {
|
||||
return false
|
||||
}
|
||||
if c == '.' {
|
||||
if consecutiveDots++; consecutiveDots >= 2 {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
consecutiveDots = 0
|
||||
}
|
||||
if !isValidByteFor(kind, c) {
|
||||
for i, part := range parts {
|
||||
if !isValidPart(partKind(i), part) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isAlphaNumeric(c byte) bool {
|
||||
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
|
||||
// Filepath returns a canonical filepath that represents the name with each part from
|
||||
// host to tag as a directory in the form:
|
||||
//
|
||||
// {host}/{namespace}/{model}/{tag}
|
||||
//
|
||||
// It uses the system's filepath separator and ensures the path is clean.
|
||||
//
|
||||
// It panics if the name is not fully qualified. Use [Name.IsFullyQualified]
|
||||
// to check if the name is fully qualified.
|
||||
func (n Name) Filepath() string {
|
||||
if !n.IsFullyQualified() {
|
||||
panic("illegal attempt to get filepath of invalid name")
|
||||
}
|
||||
return filepath.Join(
|
||||
strings.ToLower(n.Host),
|
||||
strings.ToLower(n.Namespace),
|
||||
strings.ToLower(n.Model),
|
||||
strings.ToLower(n.Tag),
|
||||
)
|
||||
}
|
||||
|
||||
func isValidByteFor(kind PartKind, c byte) bool {
|
||||
if kind == PartNamespace && c == '.' {
|
||||
// LogValue returns a slog.Value that represents the name as a string.
|
||||
func (n Name) LogValue() slog.Value {
|
||||
return slog.StringValue(n.String())
|
||||
}
|
||||
|
||||
func isValidLen(kind partKind, s string) bool {
|
||||
switch kind {
|
||||
case kindHost:
|
||||
return len(s) >= 1 && len(s) <= 350
|
||||
case kindTag:
|
||||
return len(s) >= 1 && len(s) <= 80
|
||||
default:
|
||||
return len(s) >= 2 && len(s) <= 80
|
||||
}
|
||||
}
|
||||
|
||||
func isValidPart(kind partKind, s string) bool {
|
||||
if !isValidLen(kind, s) {
|
||||
return false
|
||||
}
|
||||
if kind == PartHost && c == ':' {
|
||||
return true
|
||||
for i := range s {
|
||||
if i == 0 {
|
||||
if !isAlphanumeric(s[i]) {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
}
|
||||
switch s[i] {
|
||||
case '_', '-':
|
||||
case '.':
|
||||
if kind == kindNamespace {
|
||||
return false
|
||||
}
|
||||
case ':':
|
||||
if kind != kindHost && kind != kindDigest {
|
||||
return false
|
||||
}
|
||||
default:
|
||||
if !isAlphanumeric(s[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
if c == '.' || c == '-' {
|
||||
return true
|
||||
}
|
||||
if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
return true
|
||||
}
|
||||
|
||||
func isAlphanumeric(c byte) bool {
|
||||
return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9'
|
||||
}
|
||||
|
||||
func cutLast(s, sep string) (before, after string, ok bool) {
|
||||
i := strings.LastIndex(s, sep)
|
||||
if i >= 0 {
|
||||
return s[:i], s[i+len(sep):], true
|
||||
}
|
||||
return s, "", false
|
||||
}
|
||||
|
||||
// cutPromised cuts the last part of s at the last occurrence of sep. If sep is
|
||||
// found, the part before and after sep are returned as-is unless empty, in
|
||||
// which case they are returned as MissingPart, which will cause
|
||||
// [Name.IsValid] to return false.
|
||||
func cutPromised(s, sep string) (before, after string, ok bool) {
|
||||
before, after, ok = cutLast(s, sep)
|
||||
if !ok {
|
||||
return before, after, false
|
||||
}
|
||||
return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true
|
||||
}
|
||||
|
||||
@@ -1,717 +1,237 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type fields struct {
|
||||
host, namespace, model, tag, build string
|
||||
digest string
|
||||
}
|
||||
const (
|
||||
part80 = "88888888888888888888888888888888888888888888888888888888888888888888888888888888"
|
||||
part350 = "33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"
|
||||
)
|
||||
|
||||
func fieldsFromName(p Name) fields {
|
||||
return fields{
|
||||
host: p.parts[PartHost],
|
||||
namespace: p.parts[PartNamespace],
|
||||
model: p.parts[PartModel],
|
||||
tag: p.parts[PartTag],
|
||||
build: p.parts[PartBuild],
|
||||
digest: p.parts[PartDigest],
|
||||
}
|
||||
}
|
||||
|
||||
var testNames = map[string]fields{
|
||||
"mistral:latest": {model: "mistral", tag: "latest"},
|
||||
"mistral": {model: "mistral"},
|
||||
"mistral:30B": {model: "mistral", tag: "30B"},
|
||||
"mistral:7b": {model: "mistral", tag: "7b"},
|
||||
"mistral:7b+Q4_0": {model: "mistral", tag: "7b", build: "Q4_0"},
|
||||
"mistral+KQED": {model: "mistral", build: "KQED"},
|
||||
"mistral.x-3:7b+Q4_0": {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
|
||||
"mistral:7b+q4_0": {model: "mistral", tag: "7b", build: "q4_0"},
|
||||
"llama2": {model: "llama2"},
|
||||
"user/model": {namespace: "user", model: "model"},
|
||||
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
|
||||
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
|
||||
"localhost:5000/ns/mistral": {host: "localhost:5000", namespace: "ns", model: "mistral"},
|
||||
|
||||
// invalid digest
|
||||
"mistral:latest@invalid256-": {},
|
||||
"mistral:latest@-123": {},
|
||||
"mistral:latest@!-123": {},
|
||||
"mistral:latest@1-!": {},
|
||||
"mistral:latest@": {},
|
||||
|
||||
// resolved
|
||||
"x@sha123-12": {model: "x", digest: "sha123-12"},
|
||||
"@sha456-22": {digest: "sha456-22"},
|
||||
"@sha456-1": {},
|
||||
"@@sha123-22": {},
|
||||
|
||||
// preserves case for build
|
||||
"x+b": {model: "x", build: "b"},
|
||||
|
||||
// invalid (includes fuzzing trophies)
|
||||
" / / : + ": {},
|
||||
" / : + ": {},
|
||||
" : + ": {},
|
||||
" + ": {},
|
||||
" : ": {},
|
||||
" / ": {},
|
||||
" /": {},
|
||||
"/ ": {},
|
||||
"/": {},
|
||||
":": {},
|
||||
"+": {},
|
||||
|
||||
// (".") in namepsace is not allowed
|
||||
"invalid.com/7b+x": {},
|
||||
|
||||
"invalid:7b+Q4_0:latest": {},
|
||||
"in valid": {},
|
||||
"invalid/y/z/foo": {},
|
||||
"/0": {},
|
||||
"0 /0": {},
|
||||
"0 /": {},
|
||||
"0/": {},
|
||||
":/0": {},
|
||||
"+0/00000": {},
|
||||
"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
|
||||
"0//0": {},
|
||||
"m+^^^": {},
|
||||
"file:///etc/passwd": {},
|
||||
"file:///etc/passwd:latest": {},
|
||||
"file:///etc/passwd:latest+u": {},
|
||||
|
||||
":x": {},
|
||||
"+x": {},
|
||||
"x+": {},
|
||||
|
||||
// Disallow ("\.+") in any part to prevent path traversal anywhere
|
||||
// we convert the name to a path.
|
||||
"../etc/passwd": {},
|
||||
".../etc/passwd": {},
|
||||
"./../passwd": {},
|
||||
"./0+..": {},
|
||||
|
||||
"-h": {},
|
||||
|
||||
strings.Repeat("a", MaxNamePartLen): {model: strings.Repeat("a", MaxNamePartLen)},
|
||||
strings.Repeat("a", MaxNamePartLen+1): {},
|
||||
}
|
||||
|
||||
func TestIsValidNameLen(t *testing.T) {
|
||||
if IsValidNamePart(PartNamespace, strings.Repeat("a", MaxNamePartLen+1)) {
|
||||
t.Errorf("unexpectedly valid long name")
|
||||
}
|
||||
}
|
||||
|
||||
// TestConsecutiveDots tests that consecutive dots are not allowed in any
|
||||
// part, to avoid path traversal. There also are some tests in testNames, but
|
||||
// this test is more exhaustive and exists to emphasize the importance of
|
||||
// preventing path traversal.
|
||||
func TestNameConsecutiveDots(t *testing.T) {
|
||||
for i := 1; i < 10; i++ {
|
||||
s := "a" + strings.Repeat(".", i)
|
||||
if i > 1 {
|
||||
if g := ParseNameFill(s, FillNothing).DisplayLong(); g != "" {
|
||||
t.Errorf("ParseName(%q) = %q; want empty string", s, g)
|
||||
}
|
||||
} else {
|
||||
if g := ParseNameFill(s, FillNothing).DisplayLong(); g != s {
|
||||
t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameParts(t *testing.T) {
|
||||
var p Name
|
||||
if w, g := int(NumParts), len(p.parts); w != g {
|
||||
t.Errorf("Parts() = %d; want %d", g, w)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNamePartString(t *testing.T) {
|
||||
if g := PartKind(-2).String(); g != "Unknown" {
|
||||
t.Errorf("Unknown part = %q; want %q", g, "Unknown")
|
||||
}
|
||||
for kind, name := range kindNames {
|
||||
if g := kind.String(); g != name {
|
||||
t.Errorf("%s = %q; want %q", kind, g, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseName(t *testing.T) {
|
||||
for baseName, want := range testNames {
|
||||
for _, prefix := range []string{"", "https://", "http://"} {
|
||||
// We should get the same results with or without the
|
||||
// http(s) prefixes
|
||||
s := prefix + baseName
|
||||
|
||||
t.Run(s, func(t *testing.T) {
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
got := fieldsFromName(name)
|
||||
if got != want {
|
||||
t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
|
||||
}
|
||||
|
||||
// test round-trip
|
||||
if !ParseNameFill(name.DisplayLong(), FillNothing).EqualFold(name) {
|
||||
t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNameFill(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
fill string
|
||||
want string
|
||||
}{
|
||||
{"mistral", "example.com/library/?:latest+Q4_0", "example.com/library/mistral:latest+Q4_0"},
|
||||
{"mistral", "example.com/library/?:latest", "example.com/library/mistral:latest"},
|
||||
{"llama2:x", "example.com/library/?:latest+Q4_0", "example.com/library/llama2:x+Q4_0"},
|
||||
|
||||
// Invalid
|
||||
{"", "example.com/library/?:latest+Q4_0", ""},
|
||||
{"llama2:?", "example.com/library/?:latest+Q4_0", ""},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
name := ParseNameFill(tt.in, tt.fill)
|
||||
if g := name.DisplayLong(); g != tt.want {
|
||||
t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("invalid fill", func(t *testing.T) {
|
||||
defer func() {
|
||||
if recover() == nil {
|
||||
t.Fatal("expected panic")
|
||||
}
|
||||
}()
|
||||
ParseNameFill("x", "^")
|
||||
})
|
||||
}
|
||||
|
||||
func TestParseNameHTTPDoublePrefixStrip(t *testing.T) {
|
||||
cases := []string{
|
||||
"http://https://valid.com/valid/valid:latest",
|
||||
"https://http://valid.com/valid/valid:latest",
|
||||
}
|
||||
for _, s := range cases {
|
||||
t.Run(s, func(t *testing.T) {
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
if name.IsValid() {
|
||||
t.Errorf("expected invalid path; got %#v", name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestCompleteWithAndWithoutBuild(t *testing.T) {
|
||||
func TestParseNameParts(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
complete bool
|
||||
completeNoBuild bool
|
||||
want Name
|
||||
wantValidDigest bool
|
||||
}{
|
||||
{"", false, false},
|
||||
{"incomplete/mistral:7b+x", false, false},
|
||||
{"incomplete/mistral:7b+Q4_0", false, false},
|
||||
{"incomplete:7b+x", false, false},
|
||||
{"complete.com/x/mistral:latest+Q4_0", true, true},
|
||||
{"complete.com/x/mistral:latest", false, true},
|
||||
{
|
||||
in: "host/namespace/model:tag",
|
||||
want: Name{
|
||||
Host: "host",
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
Tag: "tag",
|
||||
},
|
||||
},
|
||||
{
|
||||
in: "host/namespace/model",
|
||||
want: Name{
|
||||
Host: "host",
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
},
|
||||
},
|
||||
{
|
||||
in: "namespace/model",
|
||||
want: Name{
|
||||
Namespace: "namespace",
|
||||
Model: "model",
|
||||
},
|
||||
},
|
||||
{
|
||||
in: "model",
|
||||
want: Name{
|
||||
Model: "model",
|
||||
},
|
||||
},
|
||||
{
|
||||
in: "h/nn/mm:t",
|
||||
want: Name{
|
||||
Host: "h",
|
||||
Namespace: "nn",
|
||||
Model: "mm",
|
||||
Tag: "t",
|
||||
},
|
||||
},
|
||||
{
|
||||
in: part80 + "/" + part80 + "/" + part80 + ":" + part80,
|
||||
want: Name{
|
||||
Host: part80,
|
||||
Namespace: part80,
|
||||
Model: part80,
|
||||
Tag: part80,
|
||||
},
|
||||
},
|
||||
{
|
||||
in: part350 + "/" + part80 + "/" + part80 + ":" + part80,
|
||||
want: Name{
|
||||
Host: part350,
|
||||
Namespace: part80,
|
||||
Model: part80,
|
||||
Tag: part80,
|
||||
},
|
||||
},
|
||||
{
|
||||
in: "@digest",
|
||||
want: Name{
|
||||
RawDigest: "digest",
|
||||
},
|
||||
wantValidDigest: false,
|
||||
},
|
||||
{
|
||||
in: "model@sha256:123",
|
||||
want: Name{
|
||||
Model: "model",
|
||||
RawDigest: "sha256:123",
|
||||
},
|
||||
wantValidDigest: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
if g := p.IsComplete(); g != tt.complete {
|
||||
t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
|
||||
}
|
||||
if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
|
||||
t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Complete uses Parts which returns a slice, but it should be
|
||||
// inlined when used in Complete, preventing any allocations or
|
||||
// escaping to the heap.
|
||||
allocs := testing.AllocsPerRun(1000, func() {
|
||||
keep(ParseNameFill("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
|
||||
})
|
||||
if allocs > 0 {
|
||||
t.Errorf("Complete allocs = %v; want 0", allocs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameLogValue(t *testing.T) {
|
||||
cases := []string{
|
||||
"example.com/library/mistral:latest+Q4_0",
|
||||
"mistral:latest",
|
||||
"mistral:7b+Q4_0",
|
||||
}
|
||||
for _, s := range cases {
|
||||
t.Run(s, func(t *testing.T) {
|
||||
var b bytes.Buffer
|
||||
log := slog.New(slog.NewTextHandler(&b, nil))
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
log.Info("", "name", name)
|
||||
want := fmt.Sprintf("name=%s", name.GoString())
|
||||
got := b.String()
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("expected log output to contain %q; got %q", want, got)
|
||||
got := ParseNameBare(tt.in)
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameGoString(t *testing.T) {
|
||||
var testCases = map[string]bool{ // name -> valid
|
||||
"host/namespace/model:tag": true,
|
||||
"host/namespace/model": false,
|
||||
"namespace/model": false,
|
||||
"model": false,
|
||||
"@sha256-1000000000000000000000000000000000000000000000000000000000000000": false,
|
||||
"model@sha256-1000000000000000000000000000000000000000000000000000000000000000": false,
|
||||
"model@sha256:1000000000000000000000000000000000000000000000000000000000000000": false,
|
||||
|
||||
// long (but valid)
|
||||
part80 + "/" + part80 + "/" + part80 + ":" + part80: true,
|
||||
part350 + "/" + part80 + "/" + part80 + ":" + part80: true,
|
||||
|
||||
"h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
|
||||
"h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
|
||||
|
||||
"m": false, // model too short
|
||||
"n/mm:": false, // namespace too short
|
||||
"h/n/mm:t": false, // namespace too short
|
||||
"@t": false, // digest too short
|
||||
"mm@d": false, // digest too short
|
||||
|
||||
// invalids
|
||||
"^": false,
|
||||
"mm:": false,
|
||||
"/nn/mm": false,
|
||||
"//": false,
|
||||
"//mm": false,
|
||||
"hh//": false,
|
||||
"//mm:@": false,
|
||||
"00@": false,
|
||||
"@": false,
|
||||
|
||||
// not starting with alphanum
|
||||
"-hh/nn/mm:tt@dd": false,
|
||||
"hh/-nn/mm:tt@dd": false,
|
||||
"hh/nn/-mm:tt@dd": false,
|
||||
"hh/nn/mm:-tt@dd": false,
|
||||
"hh/nn/mm:tt@-dd": false,
|
||||
|
||||
"": false,
|
||||
|
||||
// hosts
|
||||
"host:https/namespace/model:tag": true,
|
||||
|
||||
// colon in non-host part before tag
|
||||
"host/name:space/model:tag": false,
|
||||
}
|
||||
|
||||
func TestNameparseNameDefault(t *testing.T) {
|
||||
const name = "xx"
|
||||
n := ParseName(name)
|
||||
got := n.String()
|
||||
want := "registry.ollama.ai/library/xx:latest"
|
||||
if got != want {
|
||||
t.Errorf("parseName(%q).String() = %q; want %q", name, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameIsValid(t *testing.T) {
|
||||
var numStringTests int
|
||||
for s, want := range testCases {
|
||||
n := ParseNameBare(s)
|
||||
t.Logf("n: %#v", n)
|
||||
got := n.IsValid()
|
||||
if got != want {
|
||||
t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want)
|
||||
}
|
||||
|
||||
// Test roundtrip with String
|
||||
if got {
|
||||
got := ParseNameBare(s).String()
|
||||
if got != s {
|
||||
t.Errorf("parseName(%q).String() = %q; want %q", s, got, s)
|
||||
}
|
||||
numStringTests++
|
||||
}
|
||||
}
|
||||
|
||||
if numStringTests == 0 {
|
||||
t.Errorf("no tests for Name.String")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameIsValidPart(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in string
|
||||
wantString string
|
||||
wantGoString string // default is tt.in
|
||||
kind partKind
|
||||
s string
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "Complete Name",
|
||||
in: "example.com/library/mistral:latest+Q4_0",
|
||||
wantGoString: "example.com/library/mistral:latest+Q4_0@?",
|
||||
},
|
||||
{
|
||||
name: "Short Name",
|
||||
in: "mistral:latest",
|
||||
wantGoString: "?/?/mistral:latest+?@?",
|
||||
},
|
||||
{
|
||||
name: "Long Name",
|
||||
in: "library/mistral:latest",
|
||||
wantGoString: "?/library/mistral:latest+?@?",
|
||||
},
|
||||
{
|
||||
name: "Case Preserved",
|
||||
in: "Library/Mistral:Latest",
|
||||
wantGoString: "?/Library/Mistral:Latest+?@?",
|
||||
},
|
||||
{
|
||||
name: "With digest",
|
||||
in: "Library/Mistral:Latest@sha256-123456",
|
||||
wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
|
||||
},
|
||||
{kind: kindHost, s: "", want: false},
|
||||
{kind: kindHost, s: "a", want: true},
|
||||
{kind: kindHost, s: "a.", want: true},
|
||||
{kind: kindHost, s: "a.b", want: true},
|
||||
{kind: kindHost, s: "a:123", want: true},
|
||||
{kind: kindHost, s: "a:123/aa/bb", want: false},
|
||||
{kind: kindNamespace, s: "bb", want: true},
|
||||
{kind: kindNamespace, s: "a.", want: false},
|
||||
{kind: kindModel, s: "-h", want: false},
|
||||
{kind: kindDigest, s: "sha256-1000000000000000000000000000000000000000000000000000000000000000", want: true},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
|
||||
if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
|
||||
t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
|
||||
t.Run(tt.s, func(t *testing.T) {
|
||||
got := isValidPart(tt.kind, tt.s)
|
||||
if got != tt.want {
|
||||
t.Errorf("isValidPart(%s, %q) = %v; want %v", tt.kind, tt.s, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDisplayLongest(t *testing.T) {
|
||||
g := ParseNameFill("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
|
||||
if g != "example.com/library/mistral:latest" {
|
||||
t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
|
||||
func FuzzName(f *testing.F) {
|
||||
for s := range testCases {
|
||||
f.Add(s)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDisplayShortest(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
mask string
|
||||
want string
|
||||
wantPanic bool
|
||||
}{
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/?/?:latest", "library/mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
|
||||
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
|
||||
|
||||
// case-insensitive
|
||||
{"Example.com/library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/Library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/library/Mistral:latest+Q4_0", "example.com/library/?:latest", "Mistral", false},
|
||||
{"example.com/library/mistral:Latest+Q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
{"example.com/library/mistral:Latest+q4_0", "example.com/library/?:latest", "mistral", false},
|
||||
|
||||
// zero value
|
||||
{"", MaskDefault, "", true},
|
||||
|
||||
// invalid mask
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
|
||||
|
||||
// DefaultMask
|
||||
{"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false},
|
||||
|
||||
// Auto-Fill
|
||||
{"x", "example.com/library/?:latest", "x", false},
|
||||
{"x", "example.com/library/?:latest+Q4_0", "x", false},
|
||||
{"x/y:z", "a.com/library/?:latest+Q4_0", "x/y:z", false},
|
||||
{"x/y:z", "a.com/library/?:latest+Q4_0", "x/y:z", false},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
defer func() {
|
||||
if tt.wantPanic {
|
||||
if recover() == nil {
|
||||
t.Errorf("expected panic")
|
||||
}
|
||||
f.Fuzz(func(t *testing.T, s string) {
|
||||
n := ParseNameBare(s)
|
||||
if n.IsValid() {
|
||||
parts := [...]string{n.Host, n.Namespace, n.Model, n.Tag, n.RawDigest}
|
||||
for _, part := range parts {
|
||||
if part == ".." {
|
||||
t.Errorf("unexpected .. as valid part")
|
||||
}
|
||||
if len(part) > 350 {
|
||||
t.Errorf("part too long: %q", part)
|
||||
}
|
||||
}()
|
||||
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
if g := p.DisplayShortest(tt.mask); g != tt.want {
|
||||
t.Errorf("got = %q; want %q", g, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNameAllocs(t *testing.T) {
|
||||
allocs := testing.AllocsPerRun(1000, func() {
|
||||
keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
|
||||
})
|
||||
if allocs > 0 {
|
||||
t.Errorf("ParseName allocs = %v; want 0", allocs)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkParseName(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
for range b.N {
|
||||
keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
|
||||
}
|
||||
}
|
||||
|
||||
func FuzzParseNameFromFilepath(f *testing.F) {
|
||||
f.Add("example.com/library/mistral/7b/Q4_0")
|
||||
f.Add("example.com/../mistral/7b/Q4_0")
|
||||
f.Add("example.com/x/../7b/Q4_0")
|
||||
f.Add("example.com/x/../7b")
|
||||
f.Fuzz(func(t *testing.T, s string) {
|
||||
name := ParseNameFromFilepath(s, FillNothing)
|
||||
if strings.Contains(s, "..") && !name.IsZero() {
|
||||
t.Fatalf("non-zero value for path with '..': %q", s)
|
||||
}
|
||||
if name.IsValid() == name.IsZero() {
|
||||
t.Errorf("expected valid path to be non-zero value; got %#v", name)
|
||||
if n.String() != s {
|
||||
t.Errorf("String() = %q; want %q", n.String(), s)
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
func FuzzParseName(f *testing.F) {
|
||||
f.Add("example.com/mistral:7b+Q4_0")
|
||||
f.Add("example.com/mistral:7b+q4_0")
|
||||
f.Add("example.com/mistral:7b+x")
|
||||
f.Add("x/y/z:8n+I")
|
||||
f.Add(":x")
|
||||
f.Add("@sha256-123456")
|
||||
f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
|
||||
f.Add(":@!@")
|
||||
f.Add("...")
|
||||
f.Fuzz(func(t *testing.T, s string) {
|
||||
r0 := ParseNameFill(s, FillNothing)
|
||||
|
||||
if strings.Contains(s, "..") && !r0.IsZero() {
|
||||
t.Fatalf("non-zero value for path with '..': %q", s)
|
||||
}
|
||||
|
||||
if !r0.IsValid() && !r0.IsResolved() {
|
||||
if !r0.EqualFold(Name{}) {
|
||||
t.Errorf("expected invalid path to be zero value; got %#v", r0)
|
||||
}
|
||||
t.Skipf("invalid path: %q", s)
|
||||
}
|
||||
|
||||
for _, p := range r0.parts {
|
||||
if len(p) > MaxNamePartLen {
|
||||
t.Errorf("part too long: %q", p)
|
||||
}
|
||||
}
|
||||
|
||||
if !strings.EqualFold(r0.DisplayLong(), s) {
|
||||
t.Errorf("String() did not round-trip with case insensitivity: %q\ngot = %q\nwant = %q", s, r0.DisplayLong(), s)
|
||||
}
|
||||
|
||||
r1 := ParseNameFill(r0.DisplayLong(), FillNothing)
|
||||
if !r0.EqualFold(r1) {
|
||||
t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestNameStringAllocs(t *testing.T) {
|
||||
name := ParseNameFill("example.com/ns/mistral:latest+Q4_0", FillNothing)
|
||||
allocs := testing.AllocsPerRun(1000, func() {
|
||||
keep(name.DisplayLong())
|
||||
})
|
||||
if allocs > 1 {
|
||||
t.Errorf("String allocs = %v; want 0", allocs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNamePath(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"example.com/library/mistral:latest+Q4_0", "example.com/library/mistral:latest"},
|
||||
|
||||
// incomplete
|
||||
{"example.com/library/mistral:latest", "example.com/library/mistral:latest"},
|
||||
{"", ""},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
if g := p.DisplayURLPath(); g != tt.want {
|
||||
t.Errorf("got = %q; want %q", g, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameFilepath(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
wantNoBuild string
|
||||
}{
|
||||
{
|
||||
in: "example.com/library/mistral:latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "Example.Com/Library/Mistral:Latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "Example.Com/Library/Mistral:Latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "example.com/library/mistral:latest",
|
||||
want: "example.com/library/mistral/latest",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "",
|
||||
want: "",
|
||||
wantNoBuild: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
p := ParseNameFill(tt.in, FillNothing)
|
||||
t.Logf("ParseName(%q) = %#v", tt.in, p)
|
||||
g := p.Filepath()
|
||||
g = filepath.ToSlash(g)
|
||||
if g != tt.want {
|
||||
t.Errorf("got = %q; want %q", g, tt.want)
|
||||
}
|
||||
g = p.FilepathNoBuild()
|
||||
g = filepath.ToSlash(g)
|
||||
if g != tt.wantNoBuild {
|
||||
t.Errorf("got = %q; want %q", g, tt.wantNoBuild)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNameFilepath(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
fill string // default is FillNothing
|
||||
want string
|
||||
}{
|
||||
{
|
||||
in: "example.com/library/mistral/latest/Q4_0",
|
||||
want: "example.com/library/mistral:latest+Q4_0",
|
||||
},
|
||||
{
|
||||
in: "example.com/library/mistral/latest",
|
||||
fill: "?/?/?:latest+Q4_0",
|
||||
want: "example.com/library/mistral:latest+Q4_0",
|
||||
},
|
||||
{
|
||||
in: "example.com/library/mistral",
|
||||
fill: "?/?/?:latest+Q4_0",
|
||||
want: "example.com/library/mistral:latest+Q4_0",
|
||||
},
|
||||
{
|
||||
in: "example.com/library",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
in: "example.com/",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
in: "example.com/^/mistral/latest/Q4_0",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
in: "example.com/library/mistral/../Q4_0",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
in: "example.com/library/mistral/latest/Q4_0/extra",
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
|
||||
fill := cmp.Or(tt.fill, FillNothing)
|
||||
want := ParseNameFill(tt.want, fill)
|
||||
if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) {
|
||||
t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNameFromPath(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
fill string // default is FillNothing
|
||||
}{
|
||||
{
|
||||
in: "example.com/library/mistral:latest+Q4_0",
|
||||
want: "example.com/library/mistral:latest+Q4_0",
|
||||
},
|
||||
{
|
||||
in: "/example.com/library/mistral:latest+Q4_0",
|
||||
want: "example.com/library/mistral:latest+Q4_0",
|
||||
},
|
||||
{
|
||||
in: "/example.com/library/mistral",
|
||||
want: "example.com/library/mistral",
|
||||
},
|
||||
{
|
||||
in: "/example.com/library/mistral",
|
||||
fill: "?/?/?:latest+Q4_0",
|
||||
want: "example.com/library/mistral:latest+Q4_0",
|
||||
},
|
||||
{
|
||||
in: "/example.com/library",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
in: "/example.com/",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
in: "/example.com/^/mistral/latest",
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
fill := cmp.Or(tt.fill, FillNothing)
|
||||
if g := ParseNameFromURLPath(tt.in, fill); g.DisplayLong() != tt.want {
|
||||
t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleName_MapHash() {
|
||||
m := map[uint64]bool{}
|
||||
|
||||
// key 1
|
||||
m[ParseNameFill("mistral:latest+q4", FillNothing).MapHash()] = true
|
||||
m[ParseNameFill("miSTRal:latest+Q4", FillNothing).MapHash()] = true
|
||||
m[ParseNameFill("mistral:LATest+Q4", FillNothing).MapHash()] = true
|
||||
|
||||
// key 2
|
||||
m[ParseNameFill("mistral:LATest", FillNothing).MapHash()] = true
|
||||
|
||||
fmt.Println(len(m))
|
||||
// Output:
|
||||
// 2
|
||||
}
|
||||
|
||||
func ExampleName_CompareFold_sort() {
|
||||
names := []Name{
|
||||
ParseNameFill("mistral:latest", FillNothing),
|
||||
ParseNameFill("mistRal:7b+q4", FillNothing),
|
||||
ParseNameFill("MIstral:7b", FillNothing),
|
||||
}
|
||||
|
||||
slices.SortFunc(names, Name.CompareFold)
|
||||
|
||||
for _, n := range names {
|
||||
fmt.Println(n.DisplayLong())
|
||||
}
|
||||
|
||||
// Output:
|
||||
// MIstral:7b
|
||||
// mistRal:7b+q4
|
||||
// mistral:latest
|
||||
}
|
||||
|
||||
func ExampleName_completeAndResolved() {
|
||||
for _, s := range []string{
|
||||
"x/y/z:latest+q4_0@sha123-abc",
|
||||
"x/y/z:latest+q4_0",
|
||||
"@sha123-abc",
|
||||
} {
|
||||
name := ParseNameFill(s, FillNothing)
|
||||
fmt.Printf("complete:%v resolved:%v digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
|
||||
}
|
||||
|
||||
// Output:
|
||||
// complete:true resolved:true digest:sha123-abc
|
||||
// complete:true resolved:false digest:
|
||||
// complete:false resolved:true digest:sha123-abc
|
||||
}
|
||||
|
||||
func ExampleName_DisplayShortest() {
|
||||
name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
|
||||
|
||||
fmt.Println(name.DisplayShortest("example.com/jmorganca/?:latest"))
|
||||
fmt.Println(name.DisplayShortest("example.com/?/?:latest"))
|
||||
fmt.Println(name.DisplayShortest("example.com/?/?:?"))
|
||||
fmt.Println(name.DisplayShortest("?/?/?:?"))
|
||||
|
||||
// Default
|
||||
name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
|
||||
fmt.Println(name.DisplayShortest(""))
|
||||
|
||||
// Output:
|
||||
// mistral
|
||||
// jmorganca/mistral
|
||||
// jmorganca/mistral:latest
|
||||
// example.com/jmorganca/mistral:latest
|
||||
// mistral
|
||||
}
|
||||
|
||||
func keep[T any](v T) T { return v }
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
go test fuzz v1
|
||||
string(":")
|
||||
string("00@")
|
||||
@@ -1,2 +0,0 @@
|
||||
go test fuzz v1
|
||||
string("/0")
|
||||
@@ -1,2 +0,0 @@
|
||||
go test fuzz v1
|
||||
string("0//0")
|
||||
@@ -1,2 +0,0 @@
|
||||
go test fuzz v1
|
||||
string("0 /0")
|
||||
@@ -1,2 +0,0 @@
|
||||
go test fuzz v1
|
||||
string("+0/00000")
|
||||
@@ -1,2 +0,0 @@
|
||||
go test fuzz v1
|
||||
string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")
|
||||
Reference in New Issue
Block a user