types/model: remove Digest (for now) (#3970 )

The Digest type needs more thought and is not necessary at the moment.
also look at cwd as a root for windows runners (#3959 )
2026-01-06 06:31:14 -05:00 · 2024-04-26 21:14:28 -07:00 · 2024-04-26 19:14:08 -04:00 · 2024-04-26 16:03:38 -07:00 · 2024-04-26 15:49:46 -07:00 · 2024-04-26 18:41:01 -04:00
30 changed files with 1025 additions and 1826 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -311,29 +311,18 @@ jobs:
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-cpu
-          path: |
-            llm/build
-            dist/windows-amd64
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-cuda
-          path: |
-            llm/build
-            dist/windows-amd64
      - uses: actions/download-artifact@v4
        with:
          name: windows-cuda-deps
-          path: dist/deps
      - uses: actions/download-artifact@v4
        with:
          name: windows-rocm-deps
-          path: dist/deps
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-rocm
-          path: |
-            llm/build
-            dist/windows-amd64
      - run: dir llm/build
      - run: |
          $gopath=(get-command go).source | split-path -parent
@@ -342,8 +331,6 @@ jobs:
          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
          $env:PATH="$gopath;$env:PATH"
          $env:OLLAMA_SKIP_GENERATE="1"
-          $env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
-          $env:HIP_PATH=$(resolve-path ".\dist\deps")
          & .\scripts\build_windows.ps1
      - uses: actions/upload-artifact@v4
        with:
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,5 +1,15 @@
 name: test

+concurrency:
+  # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
+  # cancels running CI jobs and starts all new ones.
+  #
+  # For non-PR pushes, concurrency.group needs to be unique for every distinct
+  # CI run we want to have happen. Use run_id, which in practice means all
+  # non-PR CI runs will be allowed to run without preempting each other.
+  group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
 on:
  pull_request:
    paths:
@@ -21,7 +31,9 @@ jobs:
      - id: changes
        run: |
          changed() {
-            git diff-tree -r --no-commit-id --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
+            git diff-tree -r --no-commit-id --name-only \
+              $(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
+              ${{ github.event.pull_request.head.sha }} \
              | xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
          }

@@ -283,7 +295,6 @@ jobs:
        with:
          go-version-file: go.mod
          cache: true
-      - run: go get
      - run: |
          case ${{ matrix.arch }} in
            amd64) echo ARCH=x86_64 ;;
--- a/api/types.go
+++ b/api/types.go
@@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 func DefaultOptions() Options {
 	return Options{
 		// options set on request to runner
-		NumPredict:       -1,
-		NumKeep:          0,
+		NumPredict: -1,
+
+		// set a minimal num_keep to avoid issues on context shifts
+		NumKeep:          4,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -92,12 +92,8 @@ Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64
 Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
-; Assumes v5.7, may need adjustments for v6
-#if GetEnv("HIP_PATH") != ""
-  Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
-  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
-  ; amdhip64.dll dependency comes from the driver and must be installed already
-  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
+#if DirExists("..\dist\windows-amd64\rocm")
+  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
 #endif


@@ -133,7 +129,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi


 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama2
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3
 ;ClickFinish=%n

 [Registry]
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -17,6 +17,7 @@ import (
 	"os"
 	"os/signal"
 	"path/filepath"
+	"regexp"
 	"runtime"
 	"strings"
 	"syscall"
@@ -53,8 +54,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()

-	bars := make(map[string]*progress.Bar)
-
 	modelfile, err := os.ReadFile(filename)
 	if err != nil {
 		return err
@@ -95,95 +94,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

-			// TODO make this work w/ adapters
 			if fi.IsDir() {
-				tf, err := os.CreateTemp("", "ollama-tf")
+				// this is likely a safetensors or pytorch directory
+				// TODO make this work w/ adapters
+				tempfile, err := tempZipFiles(path)
 				if err != nil {
 					return err
 				}
-				defer os.RemoveAll(tf.Name())
+				defer os.RemoveAll(tempfile)

-				zf := zip.NewWriter(tf)
-
-				files := []string{}
-
-				tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
-				if err != nil {
-					return err
-				} else if len(tfiles) == 0 {
-					tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
-					if err != nil {
-						return err
-					}
-				}
-
-				files = append(files, tfiles...)
-
-				if len(files) == 0 {
-					return fmt.Errorf("no models were found in '%s'", path)
-				}
-
-				// add the safetensor/torch config file + tokenizer
-				files = append(files, filepath.Join(path, "config.json"))
-				files = append(files, filepath.Join(path, "params.json"))
-				files = append(files, filepath.Join(path, "added_tokens.json"))
-				files = append(files, filepath.Join(path, "tokenizer.model"))
-
-				for _, fn := range files {
-					f, err := os.Open(fn)
-
-					// just skip whatever files aren't there
-					if os.IsNotExist(err) {
-						if strings.HasSuffix(fn, "tokenizer.model") {
-							// try the parent dir before giving up
-							parentDir := filepath.Dir(path)
-							newFn := filepath.Join(parentDir, "tokenizer.model")
-							f, err = os.Open(newFn)
-							if os.IsNotExist(err) {
-								continue
-							} else if err != nil {
-								return err
-							}
-						} else {
-							continue
-						}
-					} else if err != nil {
-						return err
-					}
-
-					fi, err := f.Stat()
-					if err != nil {
-						return err
-					}
-
-					h, err := zip.FileInfoHeader(fi)
-					if err != nil {
-						return err
-					}
-
-					h.Name = filepath.Base(fn)
-					h.Method = zip.Store
-
-					w, err := zf.CreateHeader(h)
-					if err != nil {
-						return err
-					}
-
-					_, err = io.Copy(w, f)
-					if err != nil {
-						return err
-					}
-
-				}
-
-				if err := zf.Close(); err != nil {
-					return err
-				}
-
-				if err := tf.Close(); err != nil {
-					return err
-				}
-				path = tf.Name()
+				path = tempfile
 			}

 			digest, err := createBlob(cmd, client, path)
@@ -191,10 +111,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

-			modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
+			name := c.Name
+			if c.Name == "model" {
+				name = "from"
+			}
+
+			re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
+			modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
 		}
 	}

+	bars := make(map[string]*progress.Bar)
 	fn := func(resp api.ProgressResponse) error {
 		if resp.Digest != "" {
 			spinner.Stop()
@@ -228,6 +155,114 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	return nil
 }

+func tempZipFiles(path string) (string, error) {
+	tempfile, err := os.CreateTemp("", "ollama-tf")
+	if err != nil {
+		return "", err
+	}
+	defer tempfile.Close()
+
+	zipfile := zip.NewWriter(tempfile)
+	defer zipfile.Close()
+
+	detectContentType := func(path string) (string, error) {
+		f, err := os.Open(path)
+		if err != nil {
+			return "", err
+		}
+		defer f.Close()
+
+		var b bytes.Buffer
+		b.Grow(512)
+
+		if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
+			return "", err
+		}
+
+		contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
+		return contentType, nil
+	}
+
+	glob := func(pattern, contentType string) ([]string, error) {
+		matches, err := filepath.Glob(pattern)
+		if err != nil {
+			return nil, err
+		}
+
+		for _, safetensor := range matches {
+			if ct, err := detectContentType(safetensor); err != nil {
+				return nil, err
+			} else if ct != contentType {
+				return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
+			}
+		}
+
+		return matches, nil
+	}
+
+	var files []string
+	if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
+		// safetensors files might be unresolved git lfs references; skip if they are
+		// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
+		files = append(files, st...)
+	} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
+		// pytorch files might also be unresolved git lfs references; skip if they are
+		// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
+		files = append(files, pt...)
+	} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
+		// pytorch files might also be unresolved git lfs references; skip if they are
+		// covers consolidated.x.pth, consolidated.pth
+		files = append(files, pt...)
+	} else {
+		return "", errors.New("no safetensors or torch files found")
+	}
+
+	// add configuration files, json files are detected as text/plain
+	js, err := glob(filepath.Join(path, "*.json"), "text/plain")
+	if err != nil {
+		return "", err
+	}
+	files = append(files, js...)
+
+	if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
+		// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
+		// tokenizer.model might be a unresolved git lfs reference; error if it is
+		files = append(files, tks...)
+	} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
+		// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
+		files = append(files, tks...)
+	}
+
+	for _, file := range files {
+		f, err := os.Open(file)
+		if err != nil {
+			return "", err
+		}
+		defer f.Close()
+
+		fi, err := f.Stat()
+		if err != nil {
+			return "", err
+		}
+
+		zfi, err := zip.FileInfoHeader(fi)
+		if err != nil {
+			return "", err
+		}
+
+		zf, err := zipfile.CreateHeader(zfi)
+		if err != nil {
+			return "", err
+		}
+
+		if _, err := io.Copy(zf, f); err != nil {
+			return "", err
+		}
+	}
+
+	return tempfile.Name(), nil
+}
+
 func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
 	bin, err := os.Open(path)
 	if err != nil {
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -14,7 +14,7 @@ As this is a preview release, you should expect a few bugs here and there.  If
 you run into a problem you can reach out on
 [Discord](https://discord.gg/ollama), or file an 
 [issue](https://github.com/ollama/ollama/issues).
-Logs will often be helpful in dianosing the problem (see
+Logs will often be helpful in diagnosing the problem (see
 [Troubleshooting](#troubleshooting) below)

 ## System Requirements
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -32,9 +32,25 @@ func PayloadsDir() (string, error) {
 				slog.Error("failed to lookup executable path", "error", err)
 				return "", err
 			}
+
+			cwd, err := os.Getwd()
+			if err != nil {
+				slog.Error("failed to lookup working directory", "error", err)
+				return "", err
+			}
+
+			var paths []string
+			for _, root := range []string{appExe, cwd} {
+				paths = append(paths,
+					filepath.Join(root),
+					filepath.Join(root, "windows-"+runtime.GOARCH),
+					filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
+				)
+			}
+
 			// Try a few variations to improve developer experience when building from source in the local tree
-			for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} {
-				candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners")
+			for _, p := range paths {
+				candidate := filepath.Join(p, "ollama_runners")
 				_, err := os.Stat(candidate)
 				if err == nil {
 					runnersDir = candidate
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -21,7 +21,7 @@ init_vars() {
        # TODO - add additional optimization flags...
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
    fi
-    case $(uname -s) in 
+    case $(uname -s) in
    "Darwin")
        LIB_EXT="dylib"
        WHOLE_ARCHIVE="-Wl,-force_load"
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -165,11 +165,11 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
    fi
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
-        
+
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
-        # CUDA compute < 6.0 lacks proper FP16 support on ARM. 
-        # Disabling has minimal performance effect while maintaining compatibility. 
+        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
+        # Disabling has minimal performance effect while maintaining compatibility.
        ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
    fi
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -26,16 +26,25 @@ function amdGPUs {
    $GPU_LIST -join ';'
 }

+
 function init_vars {
-    $script:SRC_DIR = $(resolve-path "..\..\")
-    $script:llamacppDir = "../llama.cpp"
+    if (!$script:SRC_DIR) {
+        $script:SRC_DIR = $(resolve-path "..\..\")
+    }
+    if (!$script:llamacppDir) {
+        $script:llamacppDir = "../llama.cpp"
+    }
+    if (!$script:cmakeTargets) {
+        $script:cmakeTargets = @("ollama_llama_server")
+    }
    $script:cmakeDefs = @(
        "-DBUILD_SHARED_LIBS=on",
        "-DLLAMA_NATIVE=off"
        )
-    $script:cmakeTargets = @("ollama_llama_server")
+    $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
    $script:ARCH = "amd64" # arm not yet supported.
    $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
+    md "$script:DIST_BASE" -ea 0 > $null
    if ($env:CGO_CFLAGS -contains "-g") {
        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
        $script:config = "RelWithDebInfo"
@@ -166,137 +175,191 @@ function cleanup {
    }
 }

-init_vars
-git_module_setup
-apply_patches

 # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
 # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
 # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver

-$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")

-if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
+function build_static() {
+    if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
+        # GCC build for direct linking into the Go binary
+        init_vars
+        # cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
+        # as we need this to be compiled by gcc for golang to be able to link with itx
+        write-host "Checking for MinGW..."
+        # error action ensures we exit on failure
+        get-command gcc
+        get-command mingw32-make
+        $oldTargets = $script:cmakeTargets
+        $script:cmakeTargets = @("llama", "ggml")
+        $script:cmakeDefs = @(
+            "-G", "MinGW Makefiles"
+            "-DCMAKE_C_COMPILER=gcc.exe",
+            "-DCMAKE_CXX_COMPILER=g++.exe",
+            "-DBUILD_SHARED_LIBS=off",
+            "-DLLAMA_NATIVE=off",
+            "-DLLAMA_AVX=off",
+            "-DLLAMA_AVX2=off",
+            "-DLLAMA_AVX512=off",
+            "-DLLAMA_F16C=off",
+            "-DLLAMA_FMA=off")
+        $script:buildDir="../build/windows/${script:ARCH}_static"
+        write-host "Building static library"
+        build
+        $script:cmakeTargets = $oldTargets
+    } else {
+        write-host "Skipping CPU generation step as requested"
+    }
+}
+
+function build_cpu() {
+    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
+        # remaining llama.cpp builds use MSVC 
+        init_vars
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+        $script:buildDir="../build/windows/${script:ARCH}/cpu"
+        $script:distDir="$script:DIST_BASE\cpu"
+        write-host "Building LCD CPU"
+        build
+        sign
+        install
+    } else {
+        write-host "Skipping CPU generation step as requested"
+    }
+}
+
+function build_cpu_avx() {
+    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
+        init_vars
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
+        $script:distDir="$script:DIST_BASE\cpu_avx"
+        write-host "Building AVX CPU"
+        build
+        sign
+        install
+    } else {
+        write-host "Skipping CPU AVX generation step as requested"
+    }
+}
+
+function build_cpu_avx2() {
+    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
+        init_vars
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
+        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
+        $script:distDir="$script:DIST_BASE\cpu_avx2"
+        write-host "Building AVX2 CPU"
+        build
+        sign
+        install
+    } else {
+        write-host "Skipping CPU AVX2 generation step as requested"
+    }
+}
+
+function build_cuda() {
+    if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
+        # Then build cuda as a dynamically loaded library
+        $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
+        $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
+        if ($null -ne $script:CUDA_VERSION) {
+            $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
+        }
+        init_vars
+        $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
+        $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
+        $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+        if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
+            write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
+            $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
+            write-host "building custom CUDA GPU"
+        }
+        build
+        sign
+        install
+
+        write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+        cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+        cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+        cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+    } else {
+        write-host "Skipping CUDA generation step"
+    }
+}
+
+function build_rocm() {
+    if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
+        $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
+        if ($null -ne $script:ROCM_VERSION) {
+            $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
+        }
+
+        init_vars
+        $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
+        $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
+        $script:cmakeDefs += @(
+            "-G", "Ninja", 
+            "-DCMAKE_C_COMPILER=clang.exe",
+            "-DCMAKE_CXX_COMPILER=clang++.exe",
+            "-DLLAMA_HIPBLAS=on",
+            "-DHIP_PLATFORM=amd",
+            "-DLLAMA_AVX=on",
+            "-DLLAMA_AVX2=off",
+            "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
+            "-DAMDGPU_TARGETS=$(amdGPUs)",
+            "-DGPU_TARGETS=$(amdGPUs)"
+            )
+
+        # Make sure the ROCm binary dir is first in the path
+        $env:PATH="$env:HIP_PATH\bin;$env:PATH"
+
+        # We have to clobber the LIB var from the developer shell for clang to work properly
+        $env:LIB=""
+        if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
+            write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
+            $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
+            write-host "building custom ROCM GPU"
+        }
+        write-host "Building ROCm"
+        build
+        # Ninja doesn't prefix with config name
+        ${script:config}=""
+        if ($null -ne $script:DUMPBIN) {
+            & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
+        }
+        sign
+        install
+
+        # Assumes v5.7, may need adjustments for v6
+        rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
+        md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
+        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
+        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
+        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
+        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
+    } else {
+        write-host "Skipping ROCm generation step"
+    }
+}

-# GCC build for direct linking into the Go binary
 init_vars
-# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
-# as we need this to be compiled by gcc for golang to be able to link with itx
-write-host "Checking for MinGW..."
-# error action ensures we exit on failure
-get-command gcc
-get-command mingw32-make
-$script:cmakeTargets = @("llama", "ggml")
-$script:cmakeDefs = @(
-    "-G", "MinGW Makefiles"
-    "-DCMAKE_C_COMPILER=gcc.exe",
-    "-DCMAKE_CXX_COMPILER=g++.exe",
-    "-DBUILD_SHARED_LIBS=off",
-    "-DLLAMA_NATIVE=off",
-    "-DLLAMA_AVX=off",
-    "-DLLAMA_AVX2=off",
-    "-DLLAMA_AVX512=off",
-    "-DLLAMA_F16C=off",
-    "-DLLAMA_FMA=off")
-$script:buildDir="../build/windows/${script:ARCH}_static"
-write-host "Building static library"
-build
+if ($($args.count) -eq 0) {
+    git_module_setup
+    apply_patches
+    build_static
+    build_cpu
+    build_cpu_avx
+    build_cpu_avx2
+    build_cuda
+    build_rocm

-# remaining llama.cpp builds use MSVC 
-    init_vars
-    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
-    $script:buildDir="../build/windows/${script:ARCH}/cpu"
-    $script:distDir="$script:DIST_BASE\cpu"
-    write-host "Building LCD CPU"
-    build
-    sign
-    install
-
-    init_vars
-    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
-    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
-    $script:distDir="$script:DIST_BASE\cpu_avx"
-    write-host "Building AVX CPU"
-    build
-    sign
-    install
-
-    init_vars
-    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
-    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
-    $script:distDir="$script:DIST_BASE\cpu_avx2"
-    write-host "Building AVX2 CPU"
-    build
-    sign
-    install
+    cleanup
+    write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
 } else {
-    write-host "Skipping CPU generation step as requested"
-}
-
-if ($null -ne $script:CUDA_LIB_DIR) {
-    # Then build cuda as a dynamically loaded library
-    $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
-    $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
-    if ($null -ne $script:CUDA_VERSION) {
-        $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
-    }
-    init_vars
-    $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
-    $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
-    $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
-    if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
-        write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
-        $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
-        write-host "building custom CUDA GPU"
-    }
-    build
-    sign
-    install
-}
-
-if ($null -ne $env:HIP_PATH) {
-    $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
-    if ($null -ne $script:ROCM_VERSION) {
-        $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
-    }
-
-    init_vars
-    $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
-    $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
-    $script:cmakeDefs += @(
-        "-G", "Ninja", 
-        "-DCMAKE_C_COMPILER=clang.exe",
-        "-DCMAKE_CXX_COMPILER=clang++.exe",
-        "-DLLAMA_HIPBLAS=on",
-        "-DHIP_PLATFORM=amd",
-        "-DLLAMA_AVX=on",
-        "-DLLAMA_AVX2=off",
-        "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
-        "-DAMDGPU_TARGETS=$(amdGPUs)",
-        "-DGPU_TARGETS=$(amdGPUs)"
-        )
-
-    # Make sure the ROCm binary dir is first in the path
-    $env:PATH="$env:HIP_PATH\bin;$env:PATH"
-
-    # We have to clobber the LIB var from the developer shell for clang to work properly
-    $env:LIB=""
-    if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
-        write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
-        $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
-        write-host "building custom ROCM GPU"
-    }
-    write-host "Building ROCm"
-    build
-    # Ninja doesn't prefix with config name
-    ${script:config}=""
-    if ($null -ne $script:DUMPBIN) {
-        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
-    }
-    sign
-    install
-}
-
-
-cleanup
-write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
+    for ( $i = 0; $i -lt $args.count; $i++ ) {
+        write-host "performing $($args[$i])"
+        & $($args[$i])
+    } 
+}
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -5,7 +5,6 @@ import (
 	"log/slog"
 	"os"
 	"strconv"
-	"strings"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
@@ -100,8 +99,26 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 		return 0, 0
 	}

-	var layerCount int
 	layers := ggml.Tensors().Layers()
+
+	var memoryLayerOutput uint64
+	if layer, ok := layers["output_norm"]; ok {
+		memoryLayerOutput += layer.size()
+	}
+
+	if layer, ok := layers["output"]; ok {
+		memoryLayerOutput += layer.size()
+	} else if layer, ok := layers["token_embd"]; ok {
+		memoryLayerOutput += layer.size()
+	}
+
+	if gpus[0].Library == "metal" && opts.UseMMap {
+		// memory is preallocated for output tensors
+		memoryRequiredTotal += memoryLayerOutput
+		memoryRequiredPartial += memoryLayerOutput
+	}
+
+	var layerCount int
 	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
 		memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()

@@ -115,15 +132,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 		}
 	}

-	var memoryLayerOutput uint64
-	for k, v := range layers {
-		if !strings.HasPrefix(k, "blk.") {
-			memoryLayerOutput += v.size()
-		}
+	if gpus[0].Library != "metal" || !opts.UseMMap {
+		// memory was not preallocated for output tensors
+		memoryRequiredTotal += memoryLayerOutput
 	}

-	memoryRequiredTotal += memoryLayerOutput
-
 	if memoryAvailable > memoryRequiredTotal {
 		layerCount = int(ggml.KV().BlockCount()) + 1
 		memoryRequiredPartial = memoryRequiredTotal
--- a/llm/patches/02-clip-log.diff
+++ b/llm/patches/02-clip-log.diff
@@ -0,0 +1,12 @@
+diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
+index e431c7f7..f077e688 100644
+--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
+@@ -3,6 +3,7 @@
+ // I'll gradually clean and extend it
+ // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
+ #include "clip.h"
+#include "common.h"
+ #include "log.h"
+ #include "ggml.h"
+ #include "ggml-alloc.h"
--- a/llm/patches/04-metal.diff
+++ b/llm/patches/04-metal.diff
@@ -0,0 +1,45 @@
+diff --git a/ggml-metal.m b/ggml-metal.m
+index 0207b787..b5e9884b 100644
+--- a/ggml-metal.m
+++ b/ggml-metal.m
+@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
+                         // to the matrix-vector kernel
+                         int ne11_mm_min = 1;
+ 
+-#if 0
+                         // the numbers below are measured on M2 Ultra for 7B and 13B models
+                         // these numbers do not translate to other devices or model sizes
+                         // TODO: need to find a better approach
+-                        if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
+-                            switch (src0t) {
+-                                case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
+-                                case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
+-                                case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
+-                                case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
+-                                case GGML_TYPE_Q4_0:
+-                                case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
+-                                case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
+-                                case GGML_TYPE_Q5_0:                          // not tested yet
+-                                case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
+-                                case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
+-                                case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
+-                                default:             ne11_mm_min = 1;  break;
+-                            }
+                        switch (src0t) {
+                            case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
+                            case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
+                            case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q4_0:
+                            case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
+                            case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
+                            case GGML_TYPE_Q5_0:                          // not tested yet
+                            case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
+                            case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
+                            default:             ne11_mm_min = 1;  break;
+                         }
+-#endif
+ 
+                         // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
+                         // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
--- a/llm/server.go
+++ b/llm/server.go
@@ -442,7 +442,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 		select {
 		case <-ctx.Done():
 			slog.Info("context expired before server started")
-			return fmt.Errorf("timed out waiting for llama runner to start")
+			return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
 		case err := <-s.done:
 			msg := ""
 			if s.status != nil && s.status.LastErrMsg != "" {
@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 		return err
 	}
 	defer s.sem.Release(1)
+
+	// only allow maximum 10 "context shifts" to avoid infinite generation
+	if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
+		req.Options.NumPredict = 10 * s.options.NumCtx
+		slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
+	}
+
 	request := map[string]any{
 		"prompt":            req.Prompt,
 		"stream":            true,
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -82,7 +82,7 @@ function buildOllama() {
        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    }
    New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
-    cp .\ollama.exe .\dist\windows-amd64\ollama-windows-amd64.exe
+    cp .\ollama.exe .\dist\windows-amd64\
 }

 function buildApp() {
@@ -109,9 +109,6 @@ function gatherDependencies() {
    cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
    cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"

-    cp "${script:NVIDIA_DIR}\cudart64_*.dll" "${script:DEPS_DIR}\"
-    cp "${script:NVIDIA_DIR}\cublas64_*.dll" "${script:DEPS_DIR}\"
-    cp "${script:NVIDIA_DIR}\cublasLt64_*.dll" "${script:DEPS_DIR}\"

    cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
    if ("${env:KEY_CONTAINER}") {
@@ -123,15 +120,6 @@ function gatherDependencies() {
            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
        }
    }
-    if ($null -ne $env:HIP_PATH) {
-        # Assumes v5.7, may need adjustments for v6
-        rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\"
-        md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null
-        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\"
-        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\"
-        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
-        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\"
-    }
 }

 function buildInstaller() {
--- a/server/images.go
+++ b/server/images.go
@@ -29,6 +29,7 @@ import (
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )

@@ -701,36 +702,39 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
 	return path, nil
 }

-func CopyModel(src, dest string) error {
-	srcModelPath := ParseModelPath(src)
-	srcPath, err := srcModelPath.GetManifestPath()
+func CopyModel(src, dst model.Name) error {
+	if !dst.IsFullyQualified() {
+		return model.Unqualified(dst)
+	}
+	if !src.IsFullyQualified() {
+		return model.Unqualified(src)
+	}
+
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}

-	destModelPath := ParseModelPath(dest)
-	destPath, err := destModelPath.GetManifestPath()
-	if err != nil {
-		return err
-	}
-	if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
+	dstpath := filepath.Join(manifests, dst.Filepath())
+	if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
 		return err
 	}

-	// copy the file
-	input, err := os.ReadFile(srcPath)
+	srcpath := filepath.Join(manifests, src.Filepath())
+	srcfile, err := os.Open(srcpath)
 	if err != nil {
-		fmt.Println("Error reading file:", err)
 		return err
 	}
+	defer srcfile.Close()

-	err = os.WriteFile(destPath, input, 0o644)
+	dstfile, err := os.Create(dstpath)
 	if err != nil {
-		fmt.Println("Error reading file:", err)
 		return err
 	}
+	defer dstfile.Close()

-	return nil
+	_, err = io.Copy(dstfile, srcfile)
+	return err
 }

 func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
--- a/server/routes.go
+++ b/server/routes.go
@@ -29,6 +29,7 @@ import (
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )

@@ -145,6 +146,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
+		if errors.Is(err, context.Canceled) {
+			c.JSON(499, gin.H{"error": "request canceled"})
+			return
+		}
+
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -388,6 +394,11 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
+		if errors.Is(err, context.Canceled) {
+			c.JSON(499, gin.H{"error": "request canceled"})
+			return
+		}
+
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -788,34 +799,34 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
 }

 func (s *Server) CopyModelHandler(c *gin.Context) {
-	var req api.CopyRequest
-	err := c.ShouldBindJSON(&req)
-	switch {
-	case errors.Is(err, io.EOF):
+	var r api.CopyRequest
+	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
 		return
-	case err != nil:
+	} else if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}

-	if req.Source == "" || req.Destination == "" {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
+	src := model.ParseName(r.Source)
+	if !src.IsValid() {
+		_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
+	}
+
+	dst := model.ParseName(r.Destination)
+	if !dst.IsValid() {
+		_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
+	}
+
+	if len(c.Errors) > 0 {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
 		return
 	}

-	if err := ParseModelPath(req.Destination).Validate(); err != nil {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	if err := CopyModel(req.Source, req.Destination); err != nil {
-		if os.IsNotExist(err) {
-			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
-		} else {
-			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		}
-		return
+	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
+		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
+	} else if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 	}
 }

@@ -1215,6 +1226,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
+		if errors.Is(err, context.Canceled) {
+			c.JSON(499, gin.H{"error": "request canceled"})
+			return
+		}
+
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
--- a/server/sched.go
+++ b/server/sched.go
@@ -23,7 +23,6 @@ import (
 type LlmRequest struct {
 	ctx             context.Context //nolint:containedctx
 	model           *Model
-	ggml            *llm.GGML // TODO - how large is this, and do we need to free it after we've finished loading?
 	opts            api.Options
 	sessionDuration time.Duration
 	successCh       chan *runnerRef
@@ -39,7 +38,7 @@ type Scheduler struct {
 	loaded   map[string]*runnerRef
 	loadedMu sync.Mutex

-	loadFn      func(req *LlmRequest, gpus gpu.GpuInfoList)
+	loadFn      func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
 	newServerFn func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error)
 	getGpuFn    func() gpu.GpuInfoList
 }
@@ -47,6 +46,7 @@ type Scheduler struct {
 // TODO set this to zero after a release or two, to enable multiple models by default
 var loadedMax = 1          // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
 var maxQueuedRequests = 10 // TODO configurable
+var numParallel = 1

 func InitScheduler(ctx context.Context) *Scheduler {
 	maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
@@ -58,6 +58,14 @@ func InitScheduler(ctx context.Context) *Scheduler {
 			loadedMax = m
 		}
 	}
+	if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
+		p, err := strconv.Atoi(onp)
+		if err != nil || p <= 0 {
+			slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
+		} else {
+			numParallel = p
+		}
+	}

 	sched := &Scheduler{
 		pendingReqCh:  make(chan *LlmRequest, maxQueuedRequests),
@@ -74,20 +82,16 @@ func InitScheduler(ctx context.Context) *Scheduler {

 // context must be canceled to decrement ref count and release the runner
 func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
-	ggml, err := llm.LoadModel(model.ModelPath)
 	req := &LlmRequest{
 		ctx:             c,
 		model:           model,
-		ggml:            ggml,
 		opts:            opts,
 		sessionDuration: sessionDuration,
 		successCh:       make(chan *runnerRef),
 		errCh:           make(chan error, 1),
 	}
-	if err != nil {
-		req.errCh <- err
-		return req.successCh, req.errCh
-	}
+	// context split across parallel threads
+	opts.NumCtx = opts.NumCtx * numParallel
 	select {
 	case s.pendingReqCh <- req:
 	default:
@@ -130,28 +134,39 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						pending.useLoadedRunner(runner, s.finishedReqCh)
 						break
 					}
-				} else if loadedCount == 0 {
-					slog.Debug("loading first model", "model", pending.model.ModelPath)
-					gpus := s.getGpuFn()
-					g := pickBestFitGPUs(pending, gpus)
-					if g != nil {
-						gpus = g
-					}
-					s.loadFn(pending, gpus)
-					break
 				} else if loadedMax > 0 && loadedCount >= loadedMax {
 					slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
 					runnerToExpire = s.findRunnerToUnload(pending)
 				} else {
-					// More than one loaded model, so we have to see if the new one fits
+					// Either no models are loaded or below loadedMax
 					// Get a refreshed GPU list
 					gpus := s.getGpuFn()
+
+					// Load model for fitting
+					ggml, err := llm.LoadModel(pending.model.ModelPath)
+					if err != nil {
+						pending.errCh <- err
+						break
+					}
+
+					// No models loaded. Load the model but prefer the best fit.
+					if loadedCount == 0 {
+						slog.Debug("loading first model", "model", pending.model.ModelPath)
+						g := pickBestFitGPUs(pending, ggml, gpus)
+						if g != nil {
+							gpus = g
+						}
+						s.loadFn(pending, ggml, gpus)
+						break
+					}
+
+					// More than one loaded model, so we have to see if the new one fits
 					// Update free memory from currently loaded models
 					s.updateFreeSpace(gpus)
-					gpus = pickBestFitGPUs(pending, gpus)
+					gpus = pickBestFitGPUs(pending, ggml, gpus)
 					if gpus != nil {
 						slog.Debug("new model fits with existing models, loading")
-						s.loadFn(pending, gpus)
+						s.loadFn(pending, ggml, gpus)
 						break
 					}
 					runnerToExpire = s.findRunnerToUnload(pending)
@@ -282,8 +297,8 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
 	}()
 }

-func (s *Scheduler) load(req *LlmRequest, gpus gpu.GpuInfoList) {
-	llama, err := s.newServerFn(gpus, req.model.ModelPath, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
+func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) {
+	llama, err := s.newServerFn(gpus, req.model.ModelPath, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
 	if err != nil {
 		// some older models are not compatible with newer versions of llama.cpp
 		// show a generalized compatibility error until there is a better way to
@@ -417,16 +432,21 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 	slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
 	runner.refMu.Lock()
 	defer runner.refMu.Unlock()
-	// Ignore the NumGPU settings for comparison
-	optsExisting := runner.Options.Runner
-	optsExisting.NumGPU = -1
-	optsNew := req.opts.Runner
-	optsNew.NumGPU = -1
+
 	timeout := 10 * time.Second
 	if runner.loading {
 		timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
 	}
-	ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
+
+	// Don't reload runner if num_gpu=-1 was provided
+	optsExisting := runner.Options.Runner
+	optsNew := req.opts.Runner
+	if optsNew.NumGPU < 0 {
+		optsExisting.NumGPU = -1
+		optsNew.NumGPU = -1
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()
 	if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
 		!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
@@ -434,6 +454,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 		runner.llama.Ping(ctx) != nil {
 		return true
 	}
+
 	return false
 }

@@ -454,7 +475,7 @@ func (a ByDuration) Less(i, j int) bool {

 // pickBestFitGPUs will try to find the optimal placement of the model in the available GPUs where the model fully fits
 // If the model can not be fit fully within the available GPU(s) nil is returned
-func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
+func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.GpuInfoList {
 	var estimatedVRAM uint64
 	for _, gl := range gpus.ByLibrary() {
 		var ok bool
@@ -466,7 +487,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {

 		// First attempt to fit the model into a single GPU
 		for _, g := range sgl {
-			if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
+			if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
 				slog.Debug("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM))
 				return []gpu.GpuInfo{g}
 			}
@@ -477,7 +498,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
 		// - try subsets of GPUs instead of just falling back to 1 or all in a family

 		// Now try all the GPUs
-		if ok, estimatedVRAM = llm.PredictServerFit(gl, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
+		if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
 			slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
 			return gl
 		}
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -47,6 +47,7 @@ func TestLoad(t *testing.T) {
 	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()
 	s := InitScheduler(ctx)
+	var ggml *llm.GGML // value not used in tests
 	req := &LlmRequest{
 		ctx:             ctx,
 		model:           &Model{ModelPath: "foo"},
@@ -59,7 +60,7 @@ func TestLoad(t *testing.T) {
 		return nil, fmt.Errorf("something failed to load model blah")
 	}
 	gpus := gpu.GpuInfoList{}
-	s.load(req, gpus)
+	s.load(req, ggml, gpus)
 	require.Len(t, req.successCh, 0)
 	require.Len(t, req.errCh, 1)
 	require.Len(t, s.loaded, 0)
@@ -70,7 +71,7 @@ func TestLoad(t *testing.T) {
 	s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
 		return server, nil
 	}
-	s.load(req, gpus)
+	s.load(req, ggml, gpus)
 	select {
 	case err := <-req.errCh:
 		require.NoError(t, err)
@@ -82,7 +83,7 @@ func TestLoad(t *testing.T) {

 	req.model.ModelPath = "dummy_model_path"
 	server.waitResp = fmt.Errorf("wait failure")
-	s.load(req, gpus)
+	s.load(req, ggml, gpus)
 	select {
 	case err := <-req.errCh:
 		require.Contains(t, err.Error(), "wait failure")
@@ -101,6 +102,7 @@ type bundle struct {
 	ctxDone func()
 	srv     *mockLlm
 	req     *LlmRequest
+	ggml    *llm.GGML
 }

 func (scenario *bundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
@@ -132,14 +134,15 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
 		{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
 	})
 	assert.Nil(t, err)
+
 	fname := f.Name()
 	model := &Model{Name: modelName, ModelPath: fname}
-	ggml, err := llm.LoadModel(model.ModelPath)
+	scenario.ggml, err = llm.LoadModel(model.ModelPath)
 	require.NoError(t, err)
+
 	scenario.req = &LlmRequest{
 		ctx:             scenario.ctx,
 		model:           model,
-		ggml:            ggml,
 		sessionDuration: 5 * time.Millisecond,
 		successCh:       make(chan *runnerRef, 1),
 		errCh:           make(chan error, 1),
@@ -157,13 +160,13 @@ func TestRequests(t *testing.T) {
 	scenario1a.req.sessionDuration = 0
 	scenario1b := newScenario(t, ctx, "ollama-model-1", 11)
 	scenario1b.req.model = scenario1a.req.model
-	scenario1b.req.ggml = scenario1a.req.ggml
+	scenario1b.ggml = scenario1a.ggml
 	scenario1b.req.sessionDuration = 0

 	// simple reload of same model
 	scenario2a := newScenario(t, ctx, "ollama-model-1", 20)
 	scenario2a.req.model = scenario1a.req.model
-	scenario2a.req.ggml = scenario1a.req.ggml
+	scenario2a.ggml = scenario1a.ggml

 	// Multiple loaded models
 	scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
@@ -322,13 +325,14 @@ func TestGetRunner(t *testing.T) {
 	successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
 	require.Len(t, s.pendingReqCh, 0)
 	require.Len(t, successCh1c, 0)
+	require.Len(t, errCh1c, 0)
+
+	time.Sleep(5 * time.Millisecond)
+	require.Len(t, s.loaded, 0)
 	require.Len(t, errCh1c, 1)
 	err = <-errCh1c
 	require.Contains(t, err.Error(), "bad path")
 	scenario1b.ctxDone()
-
-	time.Sleep(5 * time.Millisecond)
-	require.Len(t, s.loaded, 0)
 }

 // TODO - add one scenario that triggers the bogus finished event with positive ref count
@@ -366,7 +370,9 @@ func TestPrematureExpired(t *testing.T) {
 	require.LessOrEqual(t, len(s.finishedReqCh), 1)
 	time.Sleep(10 * time.Millisecond)
 	require.Len(t, s.finishedReqCh, 0)
+	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 0)
+	s.loadedMu.Unlock()

 	// also shouldn't happen in real life
 	s.finishedReqCh <- scenario1a.req
@@ -426,7 +432,6 @@ func TestUpdateFreeSpace(t *testing.T) {
 	s.updateFreeSpace(gpus)
 	require.Equal(t, uint64(850), gpus[0].FreeMemory)
 	require.Equal(t, uint64(1850), gpus[1].FreeMemory)
-
 }

 func TestFindRunnerToUnload(t *testing.T) {
@@ -485,6 +490,9 @@ func TestNeedsReload(t *testing.T) {
 	require.False(t, resp)
 	req.opts.NumGPU = 99
 	resp = runner.needsReload(ctx, req)
+	require.True(t, resp)
+	req.opts.NumGPU = -1
+	resp = runner.needsReload(ctx, req)
 	require.False(t, resp)
 }

--- a/types/model/digest.go
+++ b/types/model/digest.go
@@ -1,87 +0,0 @@
-package model
-
-import (
-	"fmt"
-	"log/slog"
-	"strings"
-	"unicode"
-)
-
-// Digest represents a digest of a model Manifest. It is a comparable value
-// type and is immutable.
-//
-// The zero Digest is not a valid digest.
-type Digest struct {
-	s string
-}
-
-// Split returns the digest type and the digest value.
-func (d Digest) Split() (typ, digest string) {
-	typ, digest, _ = strings.Cut(d.s, "-")
-	return
-}
-
-// String returns the digest in the form of "<digest-type>-<digest>", or the
-// empty string if the digest is invalid.
-func (d Digest) String() string { return d.s }
-
-// IsValid returns true if the digest is valid (not zero).
-//
-// A valid digest may be created only by ParseDigest, or
-// ParseName(name).Digest().
-func (d Digest) IsValid() bool { return d.s != "" }
-
-// LogValue implements slog.Value.
-func (d Digest) LogValue() slog.Value {
-	return slog.StringValue(d.String())
-}
-
-var (
-	_ slog.LogValuer = Digest{}
-)
-
-// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
-// Digest.
-func ParseDigest(s string) Digest {
-	typ, digest, ok := strings.Cut(s, "-")
-	if !ok {
-		typ, digest, ok = strings.Cut(s, ":")
-	}
-	if ok && isValidDigestType(typ) && isValidHex(digest) && len(digest) >= 2 {
-		return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
-	}
-	return Digest{}
-}
-
-func MustParseDigest(s string) Digest {
-	d := ParseDigest(s)
-	if !d.IsValid() {
-		panic(fmt.Sprintf("invalid digest: %q", s))
-	}
-	return d
-}
-
-func isValidDigestType(s string) bool {
-	if len(s) == 0 {
-		return false
-	}
-	for _, r := range s {
-		if !unicode.IsLower(r) && !unicode.IsDigit(r) {
-			return false
-		}
-	}
-	return true
-}
-
-func isValidHex(s string) bool {
-	if len(s) == 0 {
-		return false
-	}
-	for i := range s {
-		c := s[i]
-		if c < '0' || c > '9' && c < 'a' || c > 'f' {
-			return false
-		}
-	}
-	return true
-}
--- a/types/model/digest_test.go
+++ b/types/model/digest_test.go
@@ -1,46 +0,0 @@
-package model
-
-import "testing"
-
-var testDigests = map[string]Digest{
-	"":                 {},
-	"sha256-1234":      {s: "sha256-1234"},
-	"sha256-5678":      {s: "sha256-5678"},
-	"blake2-9abc":      {s: "blake2-9abc"},
-	"-1234":            {},
-	"sha256-":          {},
-	"sha256-1234-5678": {},
-	"sha256-P":         {}, //         invalid  hex
-	"sha256-1234P":     {},
-	"---":              {},
-}
-
-func TestDigestParse(t *testing.T) {
-	// Test cases.
-	for s, want := range testDigests {
-		got := ParseDigest(s)
-		t.Logf("ParseDigest(%q) = %#v", s, got)
-		if got != want {
-			t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
-		}
-	}
-}
-
-func TestDigestString(t *testing.T) {
-	// Test cases.
-	for s, d := range testDigests {
-		want := s
-		if !d.IsValid() {
-			want = ""
-		}
-		got := d.String()
-		if got != want {
-			t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
-		}
-
-		got = ParseDigest(s).String()
-		if got != want {
-			t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
-		}
-	}
-}
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -1,715 +1,313 @@
+// Package model contains types and utilities for parsing, validating, and
+// working with model names and digests.
 package model

 import (
 	"cmp"
 	"errors"
 	"fmt"
-	"hash/maphash"
-	"io"
 	"log/slog"
-	"path"
 	"path/filepath"
-	"slices"
 	"strings"
-	"sync"
-
-	"github.com/ollama/ollama/types/structs"
 )

 // Errors
 var (
-	// ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not
-	// used by this package, but are exported so that other packages can
-	// use them, instead of defining their own errors for them.
-	ErrInvalidName    = errors.New("invalid model name")
-	ErrIncompleteName = errors.New("incomplete model name")
-	ErrInvalidDigest  = errors.New("invalid digest")
+	// ErrUnqualifiedName represents an error where a name is not fully
+	// qualified. It is not used directly in this package, but is here
+	// to avoid other packages inventing their own error type.
+	// Additionally, it can be conveniently used via [Unqualified].
+	ErrUnqualifiedName = errors.New("unqualified name")
 )

-// Defaults
-const (
-	// MaskDefault is the default mask used by [Name.DisplayShortest].
-	MaskDefault = "registry.ollama.ai/library/?:latest"
-
-	// MaskNothing is a mask that masks nothing.
-	MaskNothing = "?/?/?:?"
-
-	// DefaultFill is the default fill used by [ParseName].
-	FillDefault = "registry.ollama.ai/library/?:latest+Q4_0"
-
-	// FillNothing is a fill that fills nothing.
-	FillNothing = "?/?/?:?+?"
-)
-
-const MaxNamePartLen = 128
-
-type PartKind int
-
-// Levels of concreteness
-const (
-	// Each value aligns with its index in the Name.parts array.
-
-	PartHost PartKind = iota
-	PartNamespace
-	PartModel
-	PartTag
-	PartBuild
-	PartDigest
-
-	// NumParts is the number of parts in a Name. In this list, it must
-	// follow the final part.
-	NumParts
-
-	PartExtraneous = -1
-)
-
-var kindNames = map[PartKind]string{
-	PartHost:      "Host",
-	PartNamespace: "Namespace",
-	PartModel:     "Name",
-	PartTag:       "Tag",
-	PartBuild:     "Build",
-	PartDigest:    "Digest",
+// Unqualified is a helper function that returns an error with
+// ErrUnqualifiedName as the cause and the name as the message.
+func Unqualified(n Name) error {
+	return fmt.Errorf("%w: %s", ErrUnqualifiedName, n)
 }

-func (k PartKind) String() string {
-	return cmp.Or(kindNames[k], "Unknown")
+// MissingPart is used to indicate any part of a name that was "promised" by
+// the presence of a separator, but is missing.
+//
+// The value was chosen because it is deemed unlikely to be set by a user,
+// not a valid part name valid when checked by [Name.IsValid], and easy to
+// spot in logs.
+const MissingPart = "!MISSING!"
+
+// DefaultName returns a name with the default values for the host, namespace,
+// and tag parts. The model and digest parts are empty.
+//
+//   - The default host is ("registry.ollama.ai")
+//   - The default namespace is ("library")
+//   - The default tag is ("latest")
+func DefaultName() Name {
+	return Name{
+		Host:      "registry.ollama.ai",
+		Namespace: "library",
+		Tag:       "latest",
+	}
 }

-// Name is an opaque reference to a model. It holds the parts of a model
-// with the case preserved, but is not directly comparable with other Names
-// since model names can be represented with different casing depending on
-// the use case. For instance, "Mistral" and "mistral" are the same model
-// but each version may have come from different sources (e.g. copied from a
-// Web page, or from a file path).
+type partKind int
+
+const (
+	kindHost partKind = iota
+	kindNamespace
+	kindModel
+	kindTag
+	kindDigest
+)
+
+func (k partKind) String() string {
+	switch k {
+	case kindHost:
+		return "host"
+	case kindNamespace:
+		return "namespace"
+	case kindModel:
+		return "model"
+	case kindTag:
+		return "tag"
+	case kindDigest:
+		return "digest"
+	default:
+		return "unknown"
+	}
+}
+
+// Name is a structured representation of a model name string, as defined by
+// [ParseNameNoDefaults].
 //
-// Valid Names can ONLY be constructed by calling [ParseName].
+// It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
+// is valid.
 //
-// A Name is valid if and only if is have a valid Model part. The other parts
-// are optional.
-//
-// A Name is considered "complete" if it has all parts present. To check if a
-// Name is complete, use [Name.IsComplete].
-//
-// To compare two names in a case-insensitive manner, use [Name.EqualFold].
-//
-// The parts of a Name are:
-//
-//   - Host: the domain of the model (optional)
-//   - Namespace: the namespace of the model (optional)
-//   - Model: the name of the model (required)
-//   - Tag: the tag of the model (optional)
-//   - Build: the build of the model; usually the quantization or "file type" (optional)
-//
-// The parts can be obtained in their original form by calling [Name.Parts].
-//
-// To check if a Name has at minimum a valid model part, use [Name.IsValid].
+// It is not directly comparable with other Names. Use [Name.Equal] and
+// [Name.MapHash] for determining equality and using as a map key.
 type Name struct {
-	_     structs.Incomparable
-	parts [NumParts]string // host, namespace, model, tag, build, digest
-
-	// TODO(bmizerany): track offsets and hold s (raw string) here? We
-	// could pack the offsets all into a single uint64 since the first
-	// parts take less bits since their max offset is less than the max
-	// offset of the next part. This would save a ton of bytes per Name
-	// and mean zero allocations for String.
+	Host      string
+	Namespace string
+	Model     string
+	Tag       string
+	RawDigest string
 }

-// ParseName parses s into a Name, and returns the result of filling it with
-// defaults. The input string must be a valid string
-// representation of a model name in the form:
+// ParseName parses and assembles a Name from a name string. The
+// format of a valid name string is:
 //
-//	[host/][namespace/]<model>[:tag][+build][@<digest-type>-<digest>]
+//	  s:
+//		  { host } "/" { namespace } "/" { model } ":" { tag } "@" { digest }
+//		  { host } "/" { namespace } "/" { model } ":" { tag }
+//		  { host } "/" { namespace } "/" { model } "@" { digest }
+//		  { host } "/" { namespace } "/" { model }
+//		  { namespace } "/" { model } ":" { tag } "@" { digest }
+//		  { namespace } "/" { model } ":" { tag }
+//		  { namespace } "/" { model } "@" { digest }
+//		  { namespace } "/" { model }
+//		  { model } ":" { tag } "@" { digest }
+//		  { model } ":" { tag }
+//		  { model } "@" { digest }
+//		  { model }
+//		  "@" { digest }
+//	  host:
+//	      pattern: alphanum { alphanum | "-" | "_" | "." | ":" }*
+//	      length:  [1, 350]
+//	  namespace:
+//	      pattern: alphanum { alphanum | "-" | "_" }*
+//	      length:  [2, 80]
+//	  model:
+//	      pattern: alphanum { alphanum | "-" | "_" | "." }*
+//	      length:  [2, 80]
+//	  tag:
+//	      pattern: alphanum { alphanum | "-" | "_" | "." }*
+//	      length:  [1, 80]
+//	  digest:
+//	      pattern: alphanum { alphanum | "-" | ":" }*
+//	      length:  [2, 80]
 //
-// The name part is required, all others are optional. If a part is missing,
-// it is left empty in the returned Name. If a part is invalid, the zero Ref
-// value is returned.
+// Most users should use [ParseName] instead, unless need to support
+// different defaults than DefaultName.
 //
-// The build part is normalized to uppercase.
-//
-// Examples of valid paths:
-//
-//	"example.com/library/mistral:7b+x"
-//	"example.com/eva/mistral:7b+Q4_0"
-//	"mistral:7b+x"
-//	"example.com/mike/mistral:latest+Q4_0"
-//	"example.com/bruce/mistral:latest"
-//	"example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef"
-//
-// Examples of invalid paths:
-//
-//	"example.com/mistral:7b+"
-//	"example.com/mistral:7b+Q4_0+"
-//	"x/y/z/z:8n+I"
-//	""
-//
-// It returns the zero value if any part is invalid.
-//
-// # Fills
-//
-// For any valid s, the fill string is used to fill in missing parts of the
-// Name. The fill string must be a valid Name with the exception that any part
-// may be the string ("?"), which will not be considered for filling.
-func ParseNameFill(s, fill string) Name {
-	var r Name
-	parts(s)(func(kind PartKind, part string) bool {
-		if kind == PartDigest && !ParseDigest(part).IsValid() {
-			r = Name{}
-			return false
-		}
-		if kind == PartExtraneous || !IsValidNamePart(kind, part) {
-			r = Name{}
-			return false
-		}
-		r.parts[kind] = part
-		return true
-	})
-	if r.IsValid() || r.IsResolved() {
-		return fillName(r, fill)
-	}
-	return Name{}
-}
-
-// ParseName parses s into a Name, and returns the result of filling it
-// with FillDefault. The input string must be a valid string representation
-// of a model
+// The name returned is not guaranteed to be valid. If it is not valid, the
+// field values are left in an undefined state. Use [Name.IsValid] to check
+// if the name is valid.
 func ParseName(s string) Name {
-	return ParseNameFill(s, "")
+	return Merge(ParseNameBare(s), DefaultName())
 }

-func parseMask(s string) Name {
-	var r Name
-	parts(s)(func(kind PartKind, part string) bool {
-		if part == "?" {
-			// mask part; treat as empty but valid
-			return true
-		}
-		if !IsValidNamePart(kind, part) {
-			panic(fmt.Errorf("invalid mask part %s: %q", kind, part))
-		}
-		r.parts[kind] = part
-		return true
-	})
-	return r
-}
+// ParseNameBare parses s as a name string and returns a Name. No merge with
+// [DefaultName] is performed.
+func ParseNameBare(s string) Name {
+	var n Name
+	var promised bool

-func MustParseName(s, fill string) Name {
-	r := ParseNameFill(s, fill)
-	if !r.IsValid() {
-		panic("invalid Name: " + s)
+	s, n.RawDigest, promised = cutLast(s, "@")
+	if promised && n.RawDigest == "" {
+		n.RawDigest = MissingPart
 	}
-	return r
-}

-// fillName fills in the missing parts of dst with the parts of src.
-//
-// The returned Name will only be valid if dst is valid.
-//
-// It skips fill parts that are "?".
-func fillName(r Name, fill string) Name {
-	fill = cmp.Or(fill, FillDefault)
-	f := parseMask(fill)
-	if fill != FillNothing && f.IsZero() {
-		panic("invalid fill")
+	s, n.Tag, _ = cutPromised(s, ":")
+	s, n.Model, promised = cutPromised(s, "/")
+	if !promised {
+		n.Model = s
+		return n
 	}
-	for i := range r.parts {
-		if f.parts[i] == "?" {
-			continue
-		}
-		r.parts[i] = cmp.Or(r.parts[i], f.parts[i])
+	s, n.Namespace, promised = cutPromised(s, "/")
+	if !promised {
+		n.Namespace = s
+		return n
 	}
-	return r
+	n.Host = s
+
+	return n
 }

-// WithBuild returns a copy of r with the build set to the given string.
-func (r Name) WithBuild(build string) Name {
-	r.parts[PartBuild] = build
-	return r
+// Merge merges the host, namespace, and tag parts of the two names,
+// preferring the non-empty parts of a.
+func Merge(a, b Name) Name {
+	a.Host = cmp.Or(a.Host, b.Host)
+	a.Namespace = cmp.Or(a.Namespace, b.Namespace)
+	a.Tag = cmp.Or(a.Tag, b.Tag)
+	return a
 }

-func (r Name) WithDigest(digest Digest) Name {
-	r.parts[PartDigest] = digest.String()
-	return r
-}
-
-var mapHashSeed = maphash.MakeSeed()
-
-// MapHash returns a case insensitive hash for use in maps and equality
-// checks. For a convenient way to compare names, use [Name.EqualFold].
-//
-//nolint:errcheck
-func (r Name) MapHash() uint64 {
-	// correctly hash the parts with case insensitive comparison
-	var h maphash.Hash
-	h.SetSeed(mapHashSeed)
-	for _, part := range r.parts {
-		// downcase the part for hashing
-		for i := range part {
-			c := part[i]
-			if c >= 'A' && c <= 'Z' {
-				c = c - 'A' + 'a'
-			}
-			h.WriteByte(c)
-		}
+// String returns the name string, in the format that [ParseNameNoDefaults]
+// accepts as valid, if [Name.IsValid] reports true; otherwise the empty
+// string is returned.
+func (n Name) String() string {
+	var b strings.Builder
+	if n.Host != "" {
+		b.WriteString(n.Host)
+		b.WriteByte('/')
 	}
-	return h.Sum64()
-}
-
-func (r Name) slice(from, to PartKind) Name {
-	var v Name
-	copy(v.parts[from:to+1], r.parts[from:to+1])
-	return v
-}
-
-// DisplayShortest returns the shortest possible, masked display string in form:
-//
-//	[host/][<namespace>/]<model>[:<tag>]
-//
-// # Masks
-//
-// The mask is a string that specifies which parts of the name to omit based
-// on case-insensitive comparison. [Name.DisplayShortest] omits parts of the name
-// that are the same as the mask, moving from left to right until the first
-// unequal part is found. It then moves right to left until the first unequal
-// part is found. The result is the shortest possible display string.
-//
-// Unlike a [Name] the mask can contain "?" characters which are treated as
-// wildcards. A "?" will never match a part of the name, since a valid name
-// can never contain a "?" character.
-//
-// For example: Given a Name ("registry.ollama.ai/library/mistral:latest") masked
-// with ("registry.ollama.ai/library/?:latest") will produce the display string
-// ("mistral").
-//
-// If mask is the empty string, then [MaskDefault] is used.
-//
-// DisplayShortest panics if the mask is not the empty string, MaskNothing, and
-// invalid.
-//
-// # Builds
-//
-// For now, DisplayShortest does consider the build or return one in the
-// result. We can lift this restriction when needed.
-func (r Name) DisplayShortest(mask string) string {
-	mask = cmp.Or(mask, MaskDefault)
-	d := parseMask(mask)
-	if mask != MaskNothing && r.IsZero() {
-		panic("invalid Name")
+	if n.Namespace != "" {
+		b.WriteString(n.Namespace)
+		b.WriteByte('/')
 	}
-	for i := range PartTag {
-		if !strings.EqualFold(r.parts[i], d.parts[i]) {
-			break
-		}
-		r.parts[i] = ""
+	b.WriteString(n.Model)
+	if n.Tag != "" {
+		b.WriteByte(':')
+		b.WriteString(n.Tag)
 	}
-	for i := PartTag; i >= 0; i-- {
-		if !strings.EqualFold(r.parts[i], d.parts[i]) {
-			break
-		}
-		r.parts[i] = ""
+	if n.RawDigest != "" {
+		b.WriteByte('@')
+		b.WriteString(n.RawDigest)
 	}
-	return r.slice(PartHost, PartTag).DisplayLong()
-}
-
-// DisplayLongest returns the result of r.DisplayShortest(MaskNothing).
-func (r Name) DisplayLongest() string {
-	return r.DisplayShortest(MaskNothing)
-}
-
-var seps = [...]string{
-	PartHost:      "/",
-	PartNamespace: "/",
-	PartModel:     ":",
-	PartTag:       "+",
-	PartBuild:     "@",
-	PartDigest:    "",
-}
-
-// WriteTo implements io.WriterTo. It writes the fullest possible display
-// string in form:
-//
-//	<host>/<namespace>/<model>:<tag>+<build>@<digest-type>-<digest>
-//
-// Missing parts and their separators are not written.
-//
-// The full digest is always prefixed with "@". That is if [Name.IsValid]
-// reports false and [Name.IsResolved] reports true, then the string is
-// returned as "@<digest-type>-<digest>".
-func (r Name) writeTo(w io.StringWriter) error {
-	var partsWritten int
-	for i := range r.parts {
-		if r.parts[i] == "" {
-			continue
-		}
-		if partsWritten > 0 || i == int(PartDigest) {
-			if _, err := w.WriteString(seps[i-1]); err != nil {
-				return err
-			}
-		}
-		if _, err := w.WriteString(r.parts[i]); err != nil {
-			return err
-		}
-		partsWritten++
-	}
-	return nil
-}
-
-var builderPool = sync.Pool{
-	New: func() interface{} {
-		return &strings.Builder{}
-	},
-}
-
-// DisplayLong returns the fullest possible display string in form:
-//
-//	<host>/<namespace>/<model>:<tag>+<build>
-//
-// If any part is missing, it is omitted from the display string.
-func (r Name) DisplayLong() string {
-	b := builderPool.Get().(*strings.Builder)
-	defer builderPool.Put(b)
-	b.Reset()
-	b.Grow(50) // arbitrarily long enough for most names
-	_ = r.writeTo(b)
 	return b.String()
 }

-// GoString implements fmt.GoStringer. It returns a string suitable for
-// debugging and logging. It is similar to [Name.DisplayLong] but it always
-// returns a string that includes all parts of the Name, with missing parts
-// replaced with a ("?").
-func (r Name) GoString() string {
-	for i := range r.parts {
-		r.parts[i] = cmp.Or(r.parts[i], "?")
-	}
-	return r.DisplayLong()
-}
-
-// LogValue implements slog.Valuer.
-func (r Name) LogValue() slog.Value {
-	return slog.StringValue(r.GoString())
-}
-
-// IsComplete reports whether the Name is fully qualified. That is it has a
-// domain, namespace, name, tag, and build.
-func (r Name) IsComplete() bool {
-	return !slices.Contains(r.parts[:PartDigest], "")
-}
-
-// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the
-// build part to be present.
-func (r Name) IsCompleteNoBuild() bool {
-	return !slices.Contains(r.parts[:PartBuild], "")
-}
-
-// IsResolved reports true if the Name has a valid digest.
-//
-// It is possible to have a valid Name, or a complete Name that is not
-// resolved.
-func (r Name) IsResolved() bool {
-	return r.Digest().IsValid()
-}
-
-// Digest returns the digest part of the Name, if any.
-//
-// If Digest returns a non-empty string, then [Name.IsResolved] will return
-// true, and digest is considered valid.
-func (r Name) Digest() Digest {
-	// This was already validated by ParseName, so we can just return it.
-	return Digest{r.parts[PartDigest]}
-}
-
-// EqualFold reports whether r and o are equivalent model names, ignoring
-// case.
-func (r Name) EqualFold(o Name) bool {
-	return r.CompareFold(o) == 0
-}
-
-// CompareFold performs a case-insensitive cmp.Compare on r and o.
-//
-// This can be used with [slices.SortFunc].
-//
-// For simple equality checks, use [Name.EqualFold].
-func (r Name) CompareFold(o Name) int {
-	return slices.CompareFunc(r.parts[:], o.parts[:], compareFold)
-}
-
-func compareFold(a, b string) int {
-	return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int {
-		return cmp.Compare(downcase(a), downcase(b))
-	})
-}
-
-func downcase(r rune) rune {
-	if r >= 'A' && r <= 'Z' {
-		return r - 'A' + 'a'
-	}
-	return r
-}
-
-func (r Name) Host() string      { return r.parts[PartHost] }
-func (r Name) Namespace() string { return r.parts[PartNamespace] }
-func (r Name) Model() string     { return r.parts[PartModel] }
-func (r Name) Build() string     { return r.parts[PartBuild] }
-func (r Name) Tag() string       { return r.parts[PartTag] }
-
-// iter_Seq2 is a iter.Seq2 defined here to avoid the current build
-// restrictions in the go1.22 iter package requiring the
-// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag,
-// which we are not yet ready to support.
-//
-// Once we are ready to support rangefunc, this can be removed and replaced
-// with the iter.Seq2 type.
-type iter_Seq2[A, B any] func(func(A, B) bool)
-
-// Parts returns a sequence of the parts of a Name string from most specific
-// to least specific.
-//
-// It normalizes the input string by removing "http://" and "https://" only.
-// No other normalizations are performed.
-func parts(s string) iter_Seq2[PartKind, string] {
-	return func(yield func(PartKind, string) bool) {
-		if strings.HasPrefix(s, "http://") {
-			s = strings.TrimPrefix(s, "http://")
-		} else {
-			s = strings.TrimPrefix(s, "https://")
-		}
-
-		if len(s) > MaxNamePartLen || len(s) == 0 {
-			return
-		}
-
-		partLen := 0
-		state, j := PartDigest, len(s)
-		for i := len(s) - 1; i >= 0; i-- {
-			if partLen++; partLen > MaxNamePartLen {
-				// catch a part that is too long early, so
-				// we don't keep spinning on it, waiting for
-				// an isInValidPart check which would scan
-				// over it again.
-				yield(state, s[i+1:j])
-				return
-			}
-
-			switch s[i] {
-			case '@':
-				switch state {
-				case PartDigest:
-					if !yield(PartDigest, s[i+1:j]) {
-						return
-					}
-					if i == 0 {
-						// This is the form
-						// "@<digest>" which is valid.
-						//
-						// We're done.
-						return
-					}
-					state, j, partLen = PartBuild, i, 0
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			case '+':
-				switch state {
-				case PartBuild, PartDigest:
-					if !yield(PartBuild, s[i+1:j]) {
-						return
-					}
-					state, j, partLen = PartTag, i, 0
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			case ':':
-				switch state {
-				case PartTag, PartBuild, PartDigest:
-					if !yield(PartTag, s[i+1:j]) {
-						return
-					}
-					state, j, partLen = PartModel, i, 0
-				case PartHost:
-					// noop: support for host:port
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			case '/':
-				switch state {
-				case PartModel, PartTag, PartBuild, PartDigest:
-					if !yield(PartModel, s[i+1:j]) {
-						return
-					}
-					state, j = PartNamespace, i
-				case PartNamespace:
-					if !yield(PartNamespace, s[i+1:j]) {
-						return
-					}
-					state, j, partLen = PartHost, i, 0
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			}
-		}
-
-		if state <= PartNamespace {
-			yield(state, s[:j])
-		} else {
-			yield(PartModel, s[:j])
-		}
-	}
-}
-
-func (r Name) IsZero() bool {
-	return r.parts == [NumParts]string{}
-}
-
-// IsValid reports if a model has at minimum a valid model part.
-func (r Name) IsValid() bool {
-	// Parts ensures we only have valid parts, so no need to validate
-	// them here, only check if we have a name or not.
-	return r.parts[PartModel] != ""
-}
-
-// ParseNameFromURLPath parses forms of a URL path into a Name. Specifically,
-// it trims any leading "/" and then calls [ParseName] with fill.
-func ParseNameFromURLPath(s, fill string) Name {
-	s = strings.TrimPrefix(s, "/")
-	return ParseNameFill(s, fill)
-}
-
-func ParseNameFromURLPathFill(s, fill string) Name {
-	return ParseNameFill(s, fill)
-}
-
-// URLPath returns a complete, canonicalized, relative URL path using the parts of a
-// complete Name.
-//
-// The parts maintain their original case.
-//
-// Example:
-//
-//	ParseName("example.com/namespace/model:tag+build").URLPath() // returns "/example.com/namespace/model:tag"
-func (r Name) DisplayURLPath() string {
-	return r.DisplayShortest(MaskNothing)
-}
-
-// URLPath returns a complete, canonicalized, relative URL path using the parts of a
-// complete Name in the form:
-//
-//	<host>/<namespace>/<model>/<tag>
-//
-// The parts are downcased.
-func (r Name) URLPath() string {
-	return strings.ToLower(path.Join(r.parts[:PartBuild]...))
-}
-
-// ParseNameFromFilepath parses a file path into a Name. The input string must be a
-// valid file path representation of a model name in the form:
-//
-//	host/namespace/model/tag/build
-//
-// The zero valid is returned if s does not contain all path elements
-// leading up to the model part, or if any path element is an invalid part
-// for the its corresponding part kind.
-//
-// The fill string is used to fill in missing parts of any constructed Name.
-// See [ParseName] for more information on the fill string.
-func ParseNameFromFilepath(s, fill string) Name {
-	var r Name
-	for i := range PartBuild + 1 {
-		part, rest, _ := strings.Cut(s, string(filepath.Separator))
-		if !IsValidNamePart(i, part) {
-			return Name{}
-		}
-		r.parts[i] = part
-		s = rest
-		if s == "" {
-			break
-		}
-	}
-	if s != "" {
-		return Name{}
-	}
-	if !r.IsValid() {
-		return Name{}
-	}
-	return fillName(r, fill)
-}
-
-// Filepath returns a complete, canonicalized, relative file path using the
-// parts of a complete Name.
-//
-// Each parts is downcased, except for the build part which is upcased.
-//
-// Example:
-//
-//	ParseName("example.com/namespace/model:tag+build").Filepath() // returns "example.com/namespace/model/tag/BUILD"
-func (r Name) Filepath() string {
-	for i := range r.parts {
-		if PartKind(i) == PartBuild {
-			r.parts[i] = strings.ToUpper(r.parts[i])
-		} else {
-			r.parts[i] = strings.ToLower(r.parts[i])
-		}
-	}
-	return filepath.Join(r.parts[:]...)
-}
-
-// FilepathNoBuild returns a complete, canonicalized, relative file path using
-// the parts of a complete Name, but without the build part.
-func (r Name) FilepathNoBuild() string {
-	for i := range PartBuild {
-		r.parts[i] = strings.ToLower(r.parts[i])
-	}
-	return filepath.Join(r.parts[:PartBuild]...)
-}
-
-// IsValidNamePart reports if s contains all valid characters for the given
-// part kind and is under MaxNamePartLen bytes.
-func IsValidNamePart(kind PartKind, s string) bool {
-	if len(s) > MaxNamePartLen {
+// IsValid reports whether all parts of the name are present and valid. The
+// digest is a special case, and is checked for validity only if present.
+func (n Name) IsValid() bool {
+	if n.RawDigest != "" && !isValidPart(kindDigest, n.RawDigest) {
 		return false
 	}
-	if s == "" {
-		return false
+	return n.IsFullyQualified()
+}
+
+// IsFullyQualified returns true if all parts of the name are present and
+// valid without the digest.
+func (n Name) IsFullyQualified() bool {
+	var parts = []string{
+		n.Host,
+		n.Namespace,
+		n.Model,
+		n.Tag,
 	}
-	var consecutiveDots int
-	for i, c := range []byte(s) {
-		if i == 0 && !isAlphaNumeric(c) {
-			return false
-		}
-		if c == '.' {
-			if consecutiveDots++; consecutiveDots >= 2 {
-				return false
-			}
-		} else {
-			consecutiveDots = 0
-		}
-		if !isValidByteFor(kind, c) {
+	for i, part := range parts {
+		if !isValidPart(partKind(i), part) {
 			return false
 		}
 	}
 	return true
 }

-func isAlphaNumeric(c byte) bool {
-	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
+// Filepath returns a canonical filepath that represents the name with each part from
+// host to tag as a directory in the form:
+//
+//	{host}/{namespace}/{model}/{tag}
+//
+// It uses the system's filepath separator and ensures the path is clean.
+//
+// It panics if the name is not fully qualified. Use [Name.IsFullyQualified]
+// to check if the name is fully qualified.
+func (n Name) Filepath() string {
+	if !n.IsFullyQualified() {
+		panic("illegal attempt to get filepath of invalid name")
+	}
+	return filepath.Join(
+		strings.ToLower(n.Host),
+		strings.ToLower(n.Namespace),
+		strings.ToLower(n.Model),
+		strings.ToLower(n.Tag),
+	)
 }

-func isValidByteFor(kind PartKind, c byte) bool {
-	if kind == PartNamespace && c == '.' {
+// LogValue returns a slog.Value that represents the name as a string.
+func (n Name) LogValue() slog.Value {
+	return slog.StringValue(n.String())
+}
+
+func isValidLen(kind partKind, s string) bool {
+	switch kind {
+	case kindHost:
+		return len(s) >= 1 && len(s) <= 350
+	case kindTag:
+		return len(s) >= 1 && len(s) <= 80
+	default:
+		return len(s) >= 2 && len(s) <= 80
+	}
+}
+
+func isValidPart(kind partKind, s string) bool {
+	if !isValidLen(kind, s) {
 		return false
 	}
-	if kind == PartHost && c == ':' {
-		return true
+	for i := range s {
+		if i == 0 {
+			if !isAlphanumeric(s[i]) {
+				return false
+			}
+			continue
+		}
+		switch s[i] {
+		case '_', '-':
+		case '.':
+			if kind == kindNamespace {
+				return false
+			}
+		case ':':
+			if kind != kindHost && kind != kindDigest {
+				return false
+			}
+		default:
+			if !isAlphanumeric(s[i]) {
+				return false
+			}
+		}
 	}
-	if c == '.' || c == '-' {
-		return true
-	}
-	if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' {
-		return true
-	}
-	return false
+	return true
+}
+
+func isAlphanumeric(c byte) bool {
+	return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9'
+}
+
+func cutLast(s, sep string) (before, after string, ok bool) {
+	i := strings.LastIndex(s, sep)
+	if i >= 0 {
+		return s[:i], s[i+len(sep):], true
+	}
+	return s, "", false
+}
+
+// cutPromised cuts the last part of s at the last occurrence of sep. If sep is
+// found, the part before and after sep are returned as-is unless empty, in
+// which case they are returned as MissingPart, which will cause
+// [Name.IsValid] to return false.
+func cutPromised(s, sep string) (before, after string, ok bool) {
+	before, after, ok = cutLast(s, sep)
+	if !ok {
+		return before, after, false
+	}
+	return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true
 }
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -1,717 +1,237 @@
 package model

 import (
-	"bytes"
-	"cmp"
-	"fmt"
-	"log/slog"
-	"path/filepath"
-	"slices"
-	"strings"
+	"reflect"
 	"testing"
 )

-type fields struct {
-	host, namespace, model, tag, build string
-	digest                             string
-}
+const (
+	part80  = "88888888888888888888888888888888888888888888888888888888888888888888888888888888"
+	part350 = "33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"
+)

-func fieldsFromName(p Name) fields {
-	return fields{
-		host:      p.parts[PartHost],
-		namespace: p.parts[PartNamespace],
-		model:     p.parts[PartModel],
-		tag:       p.parts[PartTag],
-		build:     p.parts[PartBuild],
-		digest:    p.parts[PartDigest],
-	}
-}
-
-var testNames = map[string]fields{
-	"mistral:latest":                 {model: "mistral", tag: "latest"},
-	"mistral":                        {model: "mistral"},
-	"mistral:30B":                    {model: "mistral", tag: "30B"},
-	"mistral:7b":                     {model: "mistral", tag: "7b"},
-	"mistral:7b+Q4_0":                {model: "mistral", tag: "7b", build: "Q4_0"},
-	"mistral+KQED":                   {model: "mistral", build: "KQED"},
-	"mistral.x-3:7b+Q4_0":            {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
-	"mistral:7b+q4_0":                {model: "mistral", tag: "7b", build: "q4_0"},
-	"llama2":                         {model: "llama2"},
-	"user/model":                     {namespace: "user", model: "model"},
-	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
-	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
-	"localhost:5000/ns/mistral":      {host: "localhost:5000", namespace: "ns", model: "mistral"},
-
-	// invalid digest
-	"mistral:latest@invalid256-": {},
-	"mistral:latest@-123":        {},
-	"mistral:latest@!-123":       {},
-	"mistral:latest@1-!":         {},
-	"mistral:latest@":            {},
-
-	// resolved
-	"x@sha123-12": {model: "x", digest: "sha123-12"},
-	"@sha456-22":  {digest: "sha456-22"},
-	"@sha456-1":  {},
-	"@@sha123-22": {},
-
-	// preserves case for build
-	"x+b": {model: "x", build: "b"},
-
-	// invalid (includes fuzzing trophies)
-	" / / : + ": {},
-	" / : + ":   {},
-	" : + ":     {},
-	" + ":       {},
-	" : ":       {},
-	" / ":       {},
-	" /":        {},
-	"/ ":        {},
-	"/":         {},
-	":":         {},
-	"+":         {},
-
-	// (".") in namepsace is not allowed
-	"invalid.com/7b+x": {},
-
-	"invalid:7b+Q4_0:latest": {},
-	"in valid":               {},
-	"invalid/y/z/foo":        {},
-	"/0":                     {},
-	"0 /0":                   {},
-	"0 /":                    {},
-	"0/":                     {},
-	":/0":                    {},
-	"+0/00000":               {},
-	"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
-	"0//0":                        {},
-	"m+^^^":                       {},
-	"file:///etc/passwd":          {},
-	"file:///etc/passwd:latest":   {},
-	"file:///etc/passwd:latest+u": {},
-
-	":x": {},
-	"+x": {},
-	"x+": {},
-
-	// Disallow ("\.+") in any part to prevent path traversal anywhere
-	// we convert the name to a path.
-	"../etc/passwd":  {},
-	".../etc/passwd": {},
-	"./../passwd":    {},
-	"./0+..":         {},
-
-	"-h": {},
-
-	strings.Repeat("a", MaxNamePartLen):   {model: strings.Repeat("a", MaxNamePartLen)},
-	strings.Repeat("a", MaxNamePartLen+1): {},
-}
-
-func TestIsValidNameLen(t *testing.T) {
-	if IsValidNamePart(PartNamespace, strings.Repeat("a", MaxNamePartLen+1)) {
-		t.Errorf("unexpectedly valid long name")
-	}
-}
-
-// TestConsecutiveDots tests that consecutive dots are not allowed in any
-// part, to avoid path traversal. There also are some tests in testNames, but
-// this test is more exhaustive and exists to emphasize the importance of
-// preventing path traversal.
-func TestNameConsecutiveDots(t *testing.T) {
-	for i := 1; i < 10; i++ {
-		s := "a" + strings.Repeat(".", i)
-		if i > 1 {
-			if g := ParseNameFill(s, FillNothing).DisplayLong(); g != "" {
-				t.Errorf("ParseName(%q) = %q; want empty string", s, g)
-			}
-		} else {
-			if g := ParseNameFill(s, FillNothing).DisplayLong(); g != s {
-				t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
-			}
-		}
-	}
-}
-
-func TestNameParts(t *testing.T) {
-	var p Name
-	if w, g := int(NumParts), len(p.parts); w != g {
-		t.Errorf("Parts() = %d; want %d", g, w)
-	}
-}
-
-func TestNamePartString(t *testing.T) {
-	if g := PartKind(-2).String(); g != "Unknown" {
-		t.Errorf("Unknown part = %q; want %q", g, "Unknown")
-	}
-	for kind, name := range kindNames {
-		if g := kind.String(); g != name {
-			t.Errorf("%s = %q; want %q", kind, g, name)
-		}
-	}
-}
-
-func TestParseName(t *testing.T) {
-	for baseName, want := range testNames {
-		for _, prefix := range []string{"", "https://", "http://"} {
-			// We should get the same results with or without the
-			// http(s) prefixes
-			s := prefix + baseName
-
-			t.Run(s, func(t *testing.T) {
-				name := ParseNameFill(s, FillNothing)
-				got := fieldsFromName(name)
-				if got != want {
-					t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
-				}
-
-				// test round-trip
-				if !ParseNameFill(name.DisplayLong(), FillNothing).EqualFold(name) {
-					t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName)
-				}
-			})
-		}
-	}
-}
-
-func TestParseNameFill(t *testing.T) {
-	cases := []struct {
-		in   string
-		fill string
-		want string
-	}{
-		{"mistral", "example.com/library/?:latest+Q4_0", "example.com/library/mistral:latest+Q4_0"},
-		{"mistral", "example.com/library/?:latest", "example.com/library/mistral:latest"},
-		{"llama2:x", "example.com/library/?:latest+Q4_0", "example.com/library/llama2:x+Q4_0"},
-
-		// Invalid
-		{"", "example.com/library/?:latest+Q4_0", ""},
-		{"llama2:?", "example.com/library/?:latest+Q4_0", ""},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			name := ParseNameFill(tt.in, tt.fill)
-			if g := name.DisplayLong(); g != tt.want {
-				t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want)
-			}
-		})
-	}
-
-	t.Run("invalid fill", func(t *testing.T) {
-		defer func() {
-			if recover() == nil {
-				t.Fatal("expected panic")
-			}
-		}()
-		ParseNameFill("x", "^")
-	})
-}
-
-func TestParseNameHTTPDoublePrefixStrip(t *testing.T) {
-	cases := []string{
-		"http://https://valid.com/valid/valid:latest",
-		"https://http://valid.com/valid/valid:latest",
-	}
-	for _, s := range cases {
-		t.Run(s, func(t *testing.T) {
-			name := ParseNameFill(s, FillNothing)
-			if name.IsValid() {
-				t.Errorf("expected invalid path; got %#v", name)
-			}
-		})
-	}
-
-}
-
-func TestCompleteWithAndWithoutBuild(t *testing.T) {
+func TestParseNameParts(t *testing.T) {
 	cases := []struct {
 		in              string
-		complete        bool
-		completeNoBuild bool
+		want            Name
+		wantValidDigest bool
 	}{
-		{"", false, false},
-		{"incomplete/mistral:7b+x", false, false},
-		{"incomplete/mistral:7b+Q4_0", false, false},
-		{"incomplete:7b+x", false, false},
-		{"complete.com/x/mistral:latest+Q4_0", true, true},
-		{"complete.com/x/mistral:latest", false, true},
+		{
+			in: "host/namespace/model:tag",
+			want: Name{
+				Host:      "host",
+				Namespace: "namespace",
+				Model:     "model",
+				Tag:       "tag",
+			},
+		},
+		{
+			in: "host/namespace/model",
+			want: Name{
+				Host:      "host",
+				Namespace: "namespace",
+				Model:     "model",
+			},
+		},
+		{
+			in: "namespace/model",
+			want: Name{
+				Namespace: "namespace",
+				Model:     "model",
+			},
+		},
+		{
+			in: "model",
+			want: Name{
+				Model: "model",
+			},
+		},
+		{
+			in: "h/nn/mm:t",
+			want: Name{
+				Host:      "h",
+				Namespace: "nn",
+				Model:     "mm",
+				Tag:       "t",
+			},
+		},
+		{
+			in: part80 + "/" + part80 + "/" + part80 + ":" + part80,
+			want: Name{
+				Host:      part80,
+				Namespace: part80,
+				Model:     part80,
+				Tag:       part80,
+			},
+		},
+		{
+			in: part350 + "/" + part80 + "/" + part80 + ":" + part80,
+			want: Name{
+				Host:      part350,
+				Namespace: part80,
+				Model:     part80,
+				Tag:       part80,
+			},
+		},
+		{
+			in: "@digest",
+			want: Name{
+				RawDigest: "digest",
+			},
+			wantValidDigest: false,
+		},
+		{
+			in: "model@sha256:123",
+			want: Name{
+				Model:     "model",
+				RawDigest: "sha256:123",
+			},
+			wantValidDigest: true,
+		},
 	}

 	for _, tt := range cases {
 		t.Run(tt.in, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			if g := p.IsComplete(); g != tt.complete {
-				t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
-			}
-			if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
-				t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
-			}
-		})
-	}
-
-	// Complete uses Parts which returns a slice, but it should be
-	// inlined when used in Complete, preventing any allocations or
-	// escaping to the heap.
-	allocs := testing.AllocsPerRun(1000, func() {
-		keep(ParseNameFill("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
-	})
-	if allocs > 0 {
-		t.Errorf("Complete allocs = %v; want 0", allocs)
-	}
-}
-
-func TestNameLogValue(t *testing.T) {
-	cases := []string{
-		"example.com/library/mistral:latest+Q4_0",
-		"mistral:latest",
-		"mistral:7b+Q4_0",
-	}
-	for _, s := range cases {
-		t.Run(s, func(t *testing.T) {
-			var b bytes.Buffer
-			log := slog.New(slog.NewTextHandler(&b, nil))
-			name := ParseNameFill(s, FillNothing)
-			log.Info("", "name", name)
-			want := fmt.Sprintf("name=%s", name.GoString())
-			got := b.String()
-			if !strings.Contains(got, want) {
-				t.Errorf("expected log output to contain %q; got %q", want, got)
+			got := ParseNameBare(tt.in)
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
 			}
 		})
 	}
 }

-func TestNameGoString(t *testing.T) {
+var testCases = map[string]bool{ // name -> valid
+	"host/namespace/model:tag": true,
+	"host/namespace/model":     false,
+	"namespace/model":          false,
+	"model":                    false,
+	"@sha256-1000000000000000000000000000000000000000000000000000000000000000":      false,
+	"model@sha256-1000000000000000000000000000000000000000000000000000000000000000": false,
+	"model@sha256:1000000000000000000000000000000000000000000000000000000000000000": false,
+
+	// long (but valid)
+	part80 + "/" + part80 + "/" + part80 + ":" + part80:  true,
+	part350 + "/" + part80 + "/" + part80 + ":" + part80: true,
+
+	"h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
+	"h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
+
+	"m":        false, // model too short
+	"n/mm:":    false, // namespace too short
+	"h/n/mm:t": false, // namespace too short
+	"@t":       false, // digest too short
+	"mm@d":     false, // digest too short
+
+	// invalids
+	"^":      false,
+	"mm:":    false,
+	"/nn/mm": false,
+	"//":     false,
+	"//mm":   false,
+	"hh//":   false,
+	"//mm:@": false,
+	"00@":    false,
+	"@":      false,
+
+	// not starting with alphanum
+	"-hh/nn/mm:tt@dd": false,
+	"hh/-nn/mm:tt@dd": false,
+	"hh/nn/-mm:tt@dd": false,
+	"hh/nn/mm:-tt@dd": false,
+	"hh/nn/mm:tt@-dd": false,
+
+	"": false,
+
+	// hosts
+	"host:https/namespace/model:tag": true,
+
+	// colon in non-host part before tag
+	"host/name:space/model:tag": false,
+}
+
+func TestNameparseNameDefault(t *testing.T) {
+	const name = "xx"
+	n := ParseName(name)
+	got := n.String()
+	want := "registry.ollama.ai/library/xx:latest"
+	if got != want {
+		t.Errorf("parseName(%q).String() = %q; want %q", name, got, want)
+	}
+}
+
+func TestNameIsValid(t *testing.T) {
+	var numStringTests int
+	for s, want := range testCases {
+		n := ParseNameBare(s)
+		t.Logf("n: %#v", n)
+		got := n.IsValid()
+		if got != want {
+			t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want)
+		}
+
+		// Test roundtrip with String
+		if got {
+			got := ParseNameBare(s).String()
+			if got != s {
+				t.Errorf("parseName(%q).String() = %q; want %q", s, got, s)
+			}
+			numStringTests++
+		}
+	}
+
+	if numStringTests == 0 {
+		t.Errorf("no tests for Name.String")
+	}
+}
+
+func TestNameIsValidPart(t *testing.T) {
 	cases := []struct {
-		name         string
-		in           string
-		wantString   string
-		wantGoString string // default is tt.in
+		kind partKind
+		s    string
+		want bool
 	}{
-		{
-			name:         "Complete Name",
-			in:           "example.com/library/mistral:latest+Q4_0",
-			wantGoString: "example.com/library/mistral:latest+Q4_0@?",
-		},
-		{
-			name:         "Short Name",
-			in:           "mistral:latest",
-			wantGoString: "?/?/mistral:latest+?@?",
-		},
-		{
-			name:         "Long Name",
-			in:           "library/mistral:latest",
-			wantGoString: "?/library/mistral:latest+?@?",
-		},
-		{
-			name:         "Case Preserved",
-			in:           "Library/Mistral:Latest",
-			wantGoString: "?/Library/Mistral:Latest+?@?",
-		},
-		{
-			name:         "With digest",
-			in:           "Library/Mistral:Latest@sha256-123456",
-			wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
-		},
+		{kind: kindHost, s: "", want: false},
+		{kind: kindHost, s: "a", want: true},
+		{kind: kindHost, s: "a.", want: true},
+		{kind: kindHost, s: "a.b", want: true},
+		{kind: kindHost, s: "a:123", want: true},
+		{kind: kindHost, s: "a:123/aa/bb", want: false},
+		{kind: kindNamespace, s: "bb", want: true},
+		{kind: kindNamespace, s: "a.", want: false},
+		{kind: kindModel, s: "-h", want: false},
+		{kind: kindDigest, s: "sha256-1000000000000000000000000000000000000000000000000000000000000000", want: true},
 	}
-
 	for _, tt := range cases {
-		t.Run(tt.name, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
-			if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
-				t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
+		t.Run(tt.s, func(t *testing.T) {
+			got := isValidPart(tt.kind, tt.s)
+			if got != tt.want {
+				t.Errorf("isValidPart(%s, %q) = %v; want %v", tt.kind, tt.s, got, tt.want)
 			}
 		})
 	}
+
 }

-func TestDisplayLongest(t *testing.T) {
-	g := ParseNameFill("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
-	if g != "example.com/library/mistral:latest" {
-		t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
+func FuzzName(f *testing.F) {
+	for s := range testCases {
+		f.Add(s)
 	}
-}
-
-func TestDisplayShortest(t *testing.T) {
-	cases := []struct {
-		in        string
-		mask      string
-		want      string
-		wantPanic bool
-	}{
-		{"example.com/library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
-		{"example.com/library/mistral:latest+Q4_0", "example.com/?/?:latest", "library/mistral", false},
-		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
-		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
-
-		// case-insensitive
-		{"Example.com/library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
-		{"example.com/Library/mistral:latest+Q4_0", "example.com/library/?:latest", "mistral", false},
-		{"example.com/library/Mistral:latest+Q4_0", "example.com/library/?:latest", "Mistral", false},
-		{"example.com/library/mistral:Latest+Q4_0", "example.com/library/?:latest", "mistral", false},
-		{"example.com/library/mistral:Latest+q4_0", "example.com/library/?:latest", "mistral", false},
-
-		// zero value
-		{"", MaskDefault, "", true},
-
-		// invalid mask
-		{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
-
-		// DefaultMask
-		{"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false},
-
-		// Auto-Fill
-		{"x", "example.com/library/?:latest", "x", false},
-		{"x", "example.com/library/?:latest+Q4_0", "x", false},
-		{"x/y:z", "a.com/library/?:latest+Q4_0", "x/y:z", false},
-		{"x/y:z", "a.com/library/?:latest+Q4_0", "x/y:z", false},
-	}
-
-	for _, tt := range cases {
-		t.Run("", func(t *testing.T) {
-			defer func() {
-				if tt.wantPanic {
-					if recover() == nil {
-						t.Errorf("expected panic")
-					}
+	f.Fuzz(func(t *testing.T, s string) {
+		n := ParseNameBare(s)
+		if n.IsValid() {
+			parts := [...]string{n.Host, n.Namespace, n.Model, n.Tag, n.RawDigest}
+			for _, part := range parts {
+				if part == ".." {
+					t.Errorf("unexpected .. as valid part")
+				}
+				if len(part) > 350 {
+					t.Errorf("part too long: %q", part)
 				}
-			}()
-
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			if g := p.DisplayShortest(tt.mask); g != tt.want {
-				t.Errorf("got = %q; want %q", g, tt.want)
 			}
-		})
-	}
-}
-
-func TestParseNameAllocs(t *testing.T) {
-	allocs := testing.AllocsPerRun(1000, func() {
-		keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
-	})
-	if allocs > 0 {
-		t.Errorf("ParseName allocs = %v; want 0", allocs)
-	}
-}
-
-func BenchmarkParseName(b *testing.B) {
-	b.ReportAllocs()
-
-	for range b.N {
-		keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
-	}
-}
-
-func FuzzParseNameFromFilepath(f *testing.F) {
-	f.Add("example.com/library/mistral/7b/Q4_0")
-	f.Add("example.com/../mistral/7b/Q4_0")
-	f.Add("example.com/x/../7b/Q4_0")
-	f.Add("example.com/x/../7b")
-	f.Fuzz(func(t *testing.T, s string) {
-		name := ParseNameFromFilepath(s, FillNothing)
-		if strings.Contains(s, "..") && !name.IsZero() {
-			t.Fatalf("non-zero value for path with '..': %q", s)
-		}
-		if name.IsValid() == name.IsZero() {
-			t.Errorf("expected valid path to be non-zero value; got %#v", name)
+			if n.String() != s {
+				t.Errorf("String() = %q; want %q", n.String(), s)
+			}
 		}
+
 	})
 }
-
-func FuzzParseName(f *testing.F) {
-	f.Add("example.com/mistral:7b+Q4_0")
-	f.Add("example.com/mistral:7b+q4_0")
-	f.Add("example.com/mistral:7b+x")
-	f.Add("x/y/z:8n+I")
-	f.Add(":x")
-	f.Add("@sha256-123456")
-	f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
-	f.Add(":@!@")
-	f.Add("...")
-	f.Fuzz(func(t *testing.T, s string) {
-		r0 := ParseNameFill(s, FillNothing)
-
-		if strings.Contains(s, "..") && !r0.IsZero() {
-			t.Fatalf("non-zero value for path with '..': %q", s)
-		}
-
-		if !r0.IsValid() && !r0.IsResolved() {
-			if !r0.EqualFold(Name{}) {
-				t.Errorf("expected invalid path to be zero value; got %#v", r0)
-			}
-			t.Skipf("invalid path: %q", s)
-		}
-
-		for _, p := range r0.parts {
-			if len(p) > MaxNamePartLen {
-				t.Errorf("part too long: %q", p)
-			}
-		}
-
-		if !strings.EqualFold(r0.DisplayLong(), s) {
-			t.Errorf("String() did not round-trip with case insensitivity: %q\ngot  = %q\nwant = %q", s, r0.DisplayLong(), s)
-		}
-
-		r1 := ParseNameFill(r0.DisplayLong(), FillNothing)
-		if !r0.EqualFold(r1) {
-			t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
-		}
-	})
-}
-
-func TestNameStringAllocs(t *testing.T) {
-	name := ParseNameFill("example.com/ns/mistral:latest+Q4_0", FillNothing)
-	allocs := testing.AllocsPerRun(1000, func() {
-		keep(name.DisplayLong())
-	})
-	if allocs > 1 {
-		t.Errorf("String allocs = %v; want 0", allocs)
-	}
-}
-
-func TestNamePath(t *testing.T) {
-	cases := []struct {
-		in   string
-		want string
-	}{
-		{"example.com/library/mistral:latest+Q4_0", "example.com/library/mistral:latest"},
-
-		// incomplete
-		{"example.com/library/mistral:latest", "example.com/library/mistral:latest"},
-		{"", ""},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			if g := p.DisplayURLPath(); g != tt.want {
-				t.Errorf("got = %q; want %q", g, tt.want)
-			}
-		})
-	}
-}
-
-func TestNameFilepath(t *testing.T) {
-	cases := []struct {
-		in          string
-		want        string
-		wantNoBuild string
-	}{
-		{
-			in:          "example.com/library/mistral:latest+Q4_0",
-			want:        "example.com/library/mistral/latest/Q4_0",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "Example.Com/Library/Mistral:Latest+Q4_0",
-			want:        "example.com/library/mistral/latest/Q4_0",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "Example.Com/Library/Mistral:Latest+Q4_0",
-			want:        "example.com/library/mistral/latest/Q4_0",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "example.com/library/mistral:latest",
-			want:        "example.com/library/mistral/latest",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "",
-			want:        "",
-			wantNoBuild: "",
-		},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			g := p.Filepath()
-			g = filepath.ToSlash(g)
-			if g != tt.want {
-				t.Errorf("got = %q; want %q", g, tt.want)
-			}
-			g = p.FilepathNoBuild()
-			g = filepath.ToSlash(g)
-			if g != tt.wantNoBuild {
-				t.Errorf("got = %q; want %q", g, tt.wantNoBuild)
-			}
-		})
-	}
-}
-
-func TestParseNameFilepath(t *testing.T) {
-	cases := []struct {
-		in   string
-		fill string // default is FillNothing
-		want string
-	}{
-		{
-			in:   "example.com/library/mistral/latest/Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "example.com/library/mistral/latest",
-			fill: "?/?/?:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "example.com/library/mistral",
-			fill: "?/?/?:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "example.com/library",
-			want: "",
-		},
-		{
-			in:   "example.com/",
-			want: "",
-		},
-		{
-			in:   "example.com/^/mistral/latest/Q4_0",
-			want: "",
-		},
-		{
-			in:   "example.com/library/mistral/../Q4_0",
-			want: "",
-		},
-		{
-			in:   "example.com/library/mistral/latest/Q4_0/extra",
-			want: "",
-		},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
-			fill := cmp.Or(tt.fill, FillNothing)
-			want := ParseNameFill(tt.want, fill)
-			if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) {
-				t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
-			}
-		})
-	}
-}
-
-func TestParseNameFromPath(t *testing.T) {
-	cases := []struct {
-		in   string
-		want string
-		fill string // default is FillNothing
-	}{
-		{
-			in:   "example.com/library/mistral:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "/example.com/library/mistral:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "/example.com/library/mistral",
-			want: "example.com/library/mistral",
-		},
-		{
-			in:   "/example.com/library/mistral",
-			fill: "?/?/?:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "/example.com/library",
-			want: "",
-		},
-		{
-			in:   "/example.com/",
-			want: "",
-		},
-		{
-			in:   "/example.com/^/mistral/latest",
-			want: "",
-		},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			fill := cmp.Or(tt.fill, FillNothing)
-			if g := ParseNameFromURLPath(tt.in, fill); g.DisplayLong() != tt.want {
-				t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
-			}
-		})
-	}
-}
-
-func ExampleName_MapHash() {
-	m := map[uint64]bool{}
-
-	// key 1
-	m[ParseNameFill("mistral:latest+q4", FillNothing).MapHash()] = true
-	m[ParseNameFill("miSTRal:latest+Q4", FillNothing).MapHash()] = true
-	m[ParseNameFill("mistral:LATest+Q4", FillNothing).MapHash()] = true
-
-	// key 2
-	m[ParseNameFill("mistral:LATest", FillNothing).MapHash()] = true
-
-	fmt.Println(len(m))
-	// Output:
-	// 2
-}
-
-func ExampleName_CompareFold_sort() {
-	names := []Name{
-		ParseNameFill("mistral:latest", FillNothing),
-		ParseNameFill("mistRal:7b+q4", FillNothing),
-		ParseNameFill("MIstral:7b", FillNothing),
-	}
-
-	slices.SortFunc(names, Name.CompareFold)
-
-	for _, n := range names {
-		fmt.Println(n.DisplayLong())
-	}
-
-	// Output:
-	// MIstral:7b
-	// mistRal:7b+q4
-	// mistral:latest
-}
-
-func ExampleName_completeAndResolved() {
-	for _, s := range []string{
-		"x/y/z:latest+q4_0@sha123-abc",
-		"x/y/z:latest+q4_0",
-		"@sha123-abc",
-	} {
-		name := ParseNameFill(s, FillNothing)
-		fmt.Printf("complete:%v resolved:%v  digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
-	}
-
-	// Output:
-	// complete:true resolved:true  digest:sha123-abc
-	// complete:true resolved:false  digest:
-	// complete:false resolved:true  digest:sha123-abc
-}
-
-func ExampleName_DisplayShortest() {
-	name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
-
-	fmt.Println(name.DisplayShortest("example.com/jmorganca/?:latest"))
-	fmt.Println(name.DisplayShortest("example.com/?/?:latest"))
-	fmt.Println(name.DisplayShortest("example.com/?/?:?"))
-	fmt.Println(name.DisplayShortest("?/?/?:?"))
-
-	// Default
-	name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
-	fmt.Println(name.DisplayShortest(""))
-
-	// Output:
-	// mistral
-	// jmorganca/mistral
-	// jmorganca/mistral:latest
-	// example.com/jmorganca/mistral:latest
-	// mistral
-}
-
-func keep[T any](v T) T { return v }
--- a/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
+++ b/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
@@ -1,2 +1,2 @@
 go test fuzz v1
-string(":")
+string("00@")
--- a/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
+++ b/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("/0")
--- a/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
+++ b/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("0//0")
--- a/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
+++ b/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("0 /0")
--- a/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
+++ b/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("+0/00000")
--- a/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
+++ b/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")
Author	SHA1	Message	Date
Blake Mizerany	2bed62926e	types/model: remove Digest (for now) (#3970 ) The Digest type needs more thought and is not necessary at the moment.	2024-04-26 21:14:28 -07:00
Jeffrey Morgan	aad8d128a0	also look at cwd as a root for windows runners (#3959 )	2024-04-26 19:14:08 -04:00
Daniel Hiltgen	ec1acbb867	Merge pull request #3968 from dhiltgen/win_generate Fine grain control over windows generate steps	2024-04-26 16:03:38 -07:00
Daniel Hiltgen	e4859c4563	Fine grain control over windows generate steps This will speed up CI which already tries to only build static for unit tests	2024-04-26 15:49:46 -07:00
Nataly Merezhuk	8e30eb26bd	Updates the setup command to use llama3. (#3962 )	2024-04-26 18:41:01 -04:00
Daniel Hiltgen	0b5c589ca2	Merge pull request #3966 from dhiltgen/bump Fix target in gen_windows.ps1	2024-04-26 15:36:53 -07:00
Michael Yang	65fadddc85	Merge pull request #3964 from ollama/mxyng/weights fix gemma, command-r layer weights	2024-04-26 15:23:33 -07:00
Daniel Hiltgen	ed5fb088c4	Fix target in gen_windows.ps1	2024-04-26 15:10:42 -07:00
Michael Yang	f81f308118	fix gemma, command-r layer weights	2024-04-26 15:00:55 -07:00
Blake Mizerany	b1390a7b37	types/model: export ParseNameBare and Merge (#3957 ) These are useful outside this package.	2024-04-26 14:58:07 -07:00
Michael Yang	11d83386a5	Merge pull request #3951 from ollama/mxyng/zip check file type before zip	2024-04-26 14:51:23 -07:00
Jeffrey Morgan	bb31def011	return code `499` when user cancels request while a model is loading (#3955 )	2024-04-26 17:38:29 -04:00
Michael Yang	41e03ede95	check file type before zip	2024-04-26 14:18:07 -07:00
Michael Yang	7fea1ecdf6	Merge pull request #3958 from ollama/mxyng/fix-workflow use merge base for diff-tree	2024-04-26 14:17:56 -07:00
Blake Mizerany	054894271d	.github/workflows/test.yaml: add in-flight cancellations on new push (#3956 ) Also, remove a superfluous 'go get'	2024-04-26 13:54:24 -07:00
Michael Yang	6fef042f0b	use merge base for diff-tree	2024-04-26 13:54:15 -07:00
Daniel Hiltgen	5c0c2d1d09	Merge pull request #3954 from dhiltgen/ci_fixes Put back non-avx CPU build for windows	2024-04-26 13:09:03 -07:00
Blake Mizerany	37f9c8ad99	types/model: overhaul Name and Digest types (#3924 )	2024-04-26 13:08:32 -07:00
Quinten van Buul	2a80f55e2a	Update windows.md (#3855 ) Fixed a typo	2024-04-26 16:04:15 -04:00
Daniel Hiltgen	421c878a2d	Put back non-avx CPU build for windows	2024-04-26 12:44:07 -07:00
Daniel Hiltgen	36666c2142	Merge pull request #3925 from dhiltgen/bump Bump llama.cpp to b2737	2024-04-26 10:09:38 -07:00
Daniel Hiltgen	85801317d1	Fix clip log import	2024-04-26 09:43:46 -07:00
Daniel Hiltgen	2ed0d65948	Bump llama.cpp to b2737	2024-04-26 09:43:28 -07:00
Daniel Hiltgen	d459dc4ad1	Merge pull request #3950 from dhiltgen/windows_packaging Fix exe name for zip packaging on windows	2024-04-26 09:27:37 -07:00
Daniel Hiltgen	40bc4622ef	Fix exe name for zip packaging on windows The zip file encodes the OS and architecture, so keep the short exe name	2024-04-26 09:18:05 -07:00
Daniel Hiltgen	c0f818a07a	Merge pull request #3948 from dhiltgen/win_generate Refactor windows generate for more modular usage	2024-04-26 09:17:20 -07:00
Daniel Hiltgen	8671fdeda6	Refactor windows generate for more modular usage	2024-04-26 08:35:50 -07:00
Daniel Hiltgen	2619850fb4	Merge pull request #3933 from dhiltgen/ci_fixes Move cuda/rocm dependency gathering into generate script	2024-04-26 07:01:24 -07:00
Daniel Hiltgen	8feb97dc0d	Move cuda/rocm dependency gathering into generate script This will make it simpler for CI to accumulate artifacts from prior steps	2024-04-25 22:38:44 -07:00
Daniel Hiltgen	4e1ff6dcbb	Merge pull request #3926 from dhiltgen/ci_fixes Fix release CI	2024-04-25 17:42:31 -07:00
Daniel Hiltgen	8589d752ac	Fix release CI download-artifact path was being used incorrectly. It is where to extract the zip not the files in the zip to extract. Default is workspace dir which is what we want, so omit it	2024-04-25 17:27:11 -07:00
Michael Yang	de4ded68b0	Merge pull request #3923 from ollama/mxyng/mem only count output tensors	2024-04-25 16:34:17 -07:00
Daniel Hiltgen	9b5a3c5991	Merge pull request #3914 from dhiltgen/mac_perf Improve mac parallel performance	2024-04-25 16:28:31 -07:00
Jeffrey Morgan	00b0699c75	Reload model if `num_gpu` changes (#3920 ) * reload model if `num_gpu` changes * dont reload on -1 * fix tests	2024-04-25 19:02:40 -04:00
Jeffrey Morgan	993cf8bf55	llm: limit generation to 10x context size to avoid run on generations (#3918 ) * llm: limit generation to 10x context size to avoid run on generations * add comment * simplify condition statement	2024-04-25 19:02:30 -04:00
Michael Yang	7bb7cb8a60	only count output tensors	2024-04-25 15:24:08 -07:00
Daniel Hiltgen	b123be5b71	Adjust context size for parallelism	2024-04-25 13:58:54 -07:00
jmorganca	ddf5c09a9b	use matrix multiplcation kernels in more cases	2024-04-25 13:58:54 -07:00
Roy Yang	5f73c08729	Remove trailing spaces (#3889 )	2024-04-25 14:32:26 -04:00
Daniel Hiltgen	f503a848c2	Merge pull request #3895 from brycereitano/shiftloading Move ggml loading to when attempting to fit	2024-04-25 09:24:08 -07:00
Bryce Reitano	36a6daccab	Restructure loading conditional chain	2024-04-24 17:37:03 -06:00
Bryce Reitano	ceb0e26e5e	Provide variable ggml for TestLoad	2024-04-24 17:19:55 -06:00
Bryce Reitano	284e02bed0	Move ggml loading to when we attempt fitting	2024-04-24 17:17:24 -06:00
Michael Yang	3450a57d4a	Merge pull request #3713 from ollama/mxyng/modelname update copy handler to use model.Name	2024-04-24 16:00:32 -07:00
Michael Yang	592dae31c8	update copy to use model.Name	2024-04-24 15:54:54 -07:00
Michael Yang	2010cbc5fa	Merge pull request #3833 from ollama/mxyng/fix-from fix: from blob	2024-04-24 15:13:47 -07:00
Michael Yang	ac0801eced	only replace if it matches command	2024-04-24 14:49:26 -07:00
Michael Yang	ad66e5b060	split temp zip files	2024-04-24 14:18:01 -07:00