types/model: remove (*Digest).Scan and Digest.Value (#3589 )

types/model: remove DisplayLong (#3587 )
types/model: remove MarshalText/UnmarshalText from Digest (#3586 )
2026-01-02 04:29:51 -05:00 · 2024-04-11 00:37:26 -07:00 · 2024-04-10 16:55:12 -07:00 · 2024-04-10 16:52:49 -07:00 · 2024-04-10 16:30:05 -07:00 · 2024-04-10 16:24:37 -07:00
36 changed files with 1673 additions and 189 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -30,7 +30,7 @@ jobs:
          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - name: Build Darwin
        env:
@@ -86,7 +86,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -139,7 +139,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
@@ -214,7 +214,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -300,7 +300,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - run: go get
      - uses: actions/download-artifact@v4
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -5,7 +5,6 @@ on:
    paths:
      - '**/*'
      - '!docs/**'
-      - '!examples/**'
      - '!README.md'

 jobs:
@@ -51,7 +50,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -93,7 +92,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v4
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -124,7 +123,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v4
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -146,7 +145,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
@@ -183,7 +182,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -238,7 +237,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: false
      - run: |
          case ${{ matrix.arch }} in
@@ -283,7 +282,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: '1.22'
+          go-version-file: go.mod
          cache: true
      - run: go get
      - run: |
--- a/api/client.go
+++ b/api/client.go
@@ -1,3 +1,9 @@
+// Package api implements the client-side API for code wishing to interact
+// with the ollama service. The methods of the [Client] type correspond to
+// the ollama REST API as described in https://github.com/ollama/ollama/blob/main/docs/api.md
+//
+// The ollama command-line client itself uses this package to interact with
+// the backend service.
 package api

 import (
@@ -5,7 +11,6 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net"
@@ -19,6 +24,8 @@ import (
 	"github.com/ollama/ollama/version"
 )

+// Client encapsulates client state for interacting with the ollama
+// service. Use [ClientFromEnvironment] to create new Clients.
 type Client struct {
 	base *url.URL
 	http *http.Client
@@ -40,6 +47,15 @@ func checkError(resp *http.Response, body []byte) error {
 	return apiError
 }

+// ClientFromEnvironment creates a new [Client] using configuration from the
+// environment variable OLLAMA_HOST, which points to the network host and
+// port on which the ollama service is listenting. The format of this variable
+// is:
+//
+//	<scheme>://<host>:<port>
+//
+// If the variable is not specified, a default ollama host and port will be
+// used.
 func ClientFromEnvironment() (*Client, error) {
 	defaultPort := "11434"

@@ -191,8 +207,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 	return nil
 }

+// GenerateResponseFunc is a function that [Client.Generate] invokes every time
+// a response is received from the service. If this function returns an error,
+// [Client.Generate] will stop generating and return this error.
 type GenerateResponseFunc func(GenerateResponse) error

+// Generate generates a response for a given prompt. The req parameter should
+// be populated with prompt details. fn is called for each response (there may
+// be multiple responses, e.g. in case streaming is enabled).
 func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn GenerateResponseFunc) error {
 	return c.stream(ctx, http.MethodPost, "/api/generate", req, func(bts []byte) error {
 		var resp GenerateResponse
@@ -204,8 +226,15 @@ func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn Generate
 	})
 }

+// ChatResponseFunc is a function that [Client.Chat] invokes every time
+// a response is received from the service. If this function returns an error,
+// [Client.Chat] will stop generating and return this error.
 type ChatResponseFunc func(ChatResponse) error

+// Chat generates the next message in a chat. [ChatRequest] may contain a
+// sequence of messages which can be used to maintain chat history with a model.
+// fn is called for each response (there may be multiple responses, e.g. if case
+// streaming is enabled).
 func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
 	return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
 		var resp ChatResponse
@@ -217,8 +246,14 @@ func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc
 	})
 }

+// PullProgressFunc is a function that [Client.Pull] invokes every time there
+// is progress with a "pull" request sent to the service. If this function
+// returns an error, [Client.Pull] will stop the process and return this error.
 type PullProgressFunc func(ProgressResponse) error

+// Pull downloads a model from the ollama library. fn is called each time
+// progress is made on the request and can be used to display a progress bar,
+// etc.
 func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
 	return c.stream(ctx, http.MethodPost, "/api/pull", req, func(bts []byte) error {
 		var resp ProgressResponse
@@ -301,18 +336,7 @@ func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*Embedd
 }

 func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
-	if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
-		var statusError StatusError
-		if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
-			return err
-		}
-
-		if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
-			return err
-		}
-	}
-
-	return nil
+	return c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil)
 }

 func (c *Client) Version(ctx context.Context) (string, error) {
--- a/api/types.go
+++ b/api/types.go
@@ -33,18 +33,46 @@ func (e StatusError) Error() string {

 type ImageData []byte

+// GenerateRequest describes a request sent by [Client.Generate]. While you
+// have to specify the Model and Prompt fields, all the other fields have
+// reasonable defaults for basic uses.
 type GenerateRequest struct {
-	Model     string      `json:"model"`
-	Prompt    string      `json:"prompt"`
-	System    string      `json:"system"`
-	Template  string      `json:"template"`
-	Context   []int       `json:"context,omitempty"`
-	Stream    *bool       `json:"stream,omitempty"`
-	Raw       bool        `json:"raw,omitempty"`
-	Format    string      `json:"format"`
-	KeepAlive *Duration   `json:"keep_alive,omitempty"`
-	Images    []ImageData `json:"images,omitempty"`
+	// Model is the model name; it should be a name familiar to Ollama from
+	// the library at https://ollama.com/library
+	Model string `json:"model"`

+	// Prompt is the textual prompt to send to the model.
+	Prompt string `json:"prompt"`
+
+	// System overrides the model's default system message/prompt.
+	System string `json:"system"`
+
+	// Template overrides the model's default prompt template.
+	Template string `json:"template"`
+
+	// Context is the context parameter returned from a previous call to
+	// Generate call. It can be used to keep a short conversational memory.
+	Context []int `json:"context,omitempty"`
+
+	// Stream specifies whether the response is streaming; it is true by default.
+	Stream *bool `json:"stream,omitempty"`
+
+	// Raw set to true means that no formatting will be applied to the prompt.
+	Raw bool `json:"raw,omitempty"`
+
+	// Format specifies the format to return a response in.
+	Format string `json:"format"`
+
+	// KeepAlive controls how long the model will stay loaded in memory following
+	// this request.
+	KeepAlive *Duration `json:"keep_alive,omitempty"`
+
+	// Images is an optional list of base64-encoded images accompanying this
+	// request, for multimodal models.
+	Images []ImageData `json:"images,omitempty"`
+
+	// Options lists model-specific options. For example, temperature can be
+	// set through this field, if the model supports it.
 	Options map[string]interface{} `json:"options"`
 }

@@ -109,19 +137,24 @@ type Options struct {

 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
-	UseNUMA            bool    `json:"numa,omitempty"`
-	NumCtx             int     `json:"num_ctx,omitempty"`
-	NumBatch           int     `json:"num_batch,omitempty"`
-	NumGQA             int     `json:"num_gqa,omitempty"`
-	NumGPU             int     `json:"num_gpu,omitempty"`
-	MainGPU            int     `json:"main_gpu,omitempty"`
-	LowVRAM            bool    `json:"low_vram,omitempty"`
-	F16KV              bool    `json:"f16_kv,omitempty"`
-	LogitsAll          bool    `json:"logits_all,omitempty"`
-	VocabOnly          bool    `json:"vocab_only,omitempty"`
-	UseMMap            bool    `json:"use_mmap,omitempty"`
-	UseMLock           bool    `json:"use_mlock,omitempty"`
-	NumThread          int     `json:"num_thread,omitempty"`
+	UseNUMA   bool `json:"numa,omitempty"`
+	NumCtx    int  `json:"num_ctx,omitempty"`
+	NumBatch  int  `json:"num_batch,omitempty"`
+	NumGQA    int  `json:"num_gqa,omitempty"`
+	NumGPU    int  `json:"num_gpu,omitempty"`
+	MainGPU   int  `json:"main_gpu,omitempty"`
+	LowVRAM   bool `json:"low_vram,omitempty"`
+	F16KV     bool `json:"f16_kv,omitempty"`
+	LogitsAll bool `json:"logits_all,omitempty"`
+	VocabOnly bool `json:"vocab_only,omitempty"`
+	UseMMap   bool `json:"use_mmap,omitempty"`
+	UseMLock  bool `json:"use_mlock,omitempty"`
+	NumThread int  `json:"num_thread,omitempty"`
+
+	// Unused: RopeFrequencyBase is ignored. Instead the value in the model will be used
+	RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
+	// Unused: RopeFrequencyScale is ignored. Instead the value in the model will be used
+	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
 }

 type EmbeddingRequest struct {
@@ -137,10 +170,11 @@ type EmbeddingResponse struct {
 }

 type CreateRequest struct {
-	Model     string `json:"model"`
-	Path      string `json:"path"`
-	Modelfile string `json:"modelfile"`
-	Stream    *bool  `json:"stream,omitempty"`
+	Model        string `json:"model"`
+	Path         string `json:"path"`
+	Modelfile    string `json:"modelfile"`
+	Stream       *bool  `json:"stream,omitempty"`
+	Quantization string `json:"quantization,omitempty"`

 	// Name is deprecated, see Model
 	Name string `json:"name"`
@@ -380,16 +414,16 @@ func DefaultOptions() Options {

 		Runner: Runner{
 			// options set when the model is loaded
-			NumCtx:             2048,
-			NumBatch:           512,
-			NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
-			NumGQA:             1,
-			NumThread:          0, // let the runtime decide
-			LowVRAM:            false,
-			F16KV:              true,
-			UseMLock:           false,
-			UseMMap:            true,
-			UseNUMA:            false,
+			NumCtx:    2048,
+			NumBatch:  512,
+			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
+			NumGQA:    1,
+			NumThread: 0, // let the runtime decide
+			LowVRAM:   false,
+			F16KV:     true,
+			UseMLock:  false,
+			UseMMap:   true,
+			UseNUMA:   false,
 		},
 	}
 }
--- a/app/tray/tray.go
+++ b/app/tray/tray.go
@@ -24,10 +24,5 @@ func NewTray() (commontray.OllamaTray, error) {
 		return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err)
 	}

-	tray, err := InitPlatformTray(icon, updateIcon)
-	if err != nil {
-		return nil, err
-	}
-
-	return tray, nil
+	return InitPlatformTray(icon, updateIcon)
 }
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -194,7 +194,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return nil
 	}

-	request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
+	quantization, _ := cmd.Flags().GetString("quantization")
+
+	request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile), Quantization: quantization}
 	if err := client.Create(cmd.Context(), &request, fn); err != nil {
 		return err
 	}
@@ -943,6 +945,7 @@ func NewCLI() *cobra.Command {
 	}

 	createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
+	createCmd.Flags().StringP("quantization", "q", "", "Quantization level.")

 	showCmd := &cobra.Command{
 		Use:     "show MODEL",
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -0,0 +1,40 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/ollama/ollama/api"
+)
+
+func main() {
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// By default, GenerateRequest is streaming.
+	req := &api.GenerateRequest{
+		Model:  "gemma",
+		Prompt: "how many planets are there?",
+	}
+
+	ctx := context.Background()
+	respFunc := func(resp api.GenerateResponse) error {
+		// Only print the response here; GenerateResponse has a number of other
+		// interesting fields you want to examine.
+
+		// In streaming mode, responses are partial so we call fmt.Print (and not
+		// Println) in order to avoid spurious newlines being introduced. The
+		// model will insert its own newlines if it wants.
+		fmt.Print(resp.Response)
+		return nil
+	}
+
+	err = client.Generate(ctx, req, respFunc)
+	if err != nil {
+		log.Fatal(err)
+	}
+	fmt.Println()
+}
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -0,0 +1,37 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/ollama/ollama/api"
+)
+
+func main() {
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	req := &api.GenerateRequest{
+		Model:  "gemma",
+		Prompt: "how many planets are there?",
+
+		// set streaming to false
+		Stream: new(bool),
+	}
+
+	ctx := context.Background()
+	respFunc := func(resp api.GenerateResponse) error {
+		// Only print the response here; GenerateResponse has a number of other
+		// interesting fields you want to examine.
+		fmt.Println(resp.Response)
+		return nil
+	}
+
+	err = client.Generate(ctx, req, respFunc)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
--- a/examples/golang-simplegenerate/README.md
+++ b/examples/golang-simplegenerate/README.md
--- a/examples/golang-simplegenerate/main.go
+++ b/examples/golang-simplegenerate/main.go
--- a/format/bytes.go
+++ b/format/bytes.go
@@ -50,7 +50,7 @@ func HumanBytes(b int64) string {
 	}
 }

-func HumanBytes2(b int64) string {
+func HumanBytes2(b uint64) string {
 	switch {
 	case b >= MebiByte:
 		return fmt.Sprintf("%.1f MiB", float64(b)/MebiByte)
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -243,7 +243,7 @@ func getCPUMem() (memInfo, error) {
 	return ret, nil
 }

-func CheckVRAM() (int64, error) {
+func CheckVRAM() (uint64, error) {
 	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
 	if userLimit != "" {
 		avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -251,11 +251,11 @@ func CheckVRAM() (int64, error) {
 			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
 		}
 		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
-		return avail, nil
+		return uint64(avail), nil
 	}
 	gpuInfo := GetGPUInfo()
 	if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
-		return int64(gpuInfo.FreeMemory), nil
+		return gpuInfo.FreeMemory, nil
 	}

 	return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -17,7 +17,7 @@ import (
 )

 // CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
-func CheckVRAM() (int64, error) {
+func CheckVRAM() (uint64, error) {
 	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
 	if userLimit != "" {
 		avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -25,15 +25,14 @@ func CheckVRAM() (int64, error) {
 			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
 		}
 		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
-		return avail, nil
+		return uint64(avail), nil
 	}

 	if runtime.GOARCH == "amd64" {
 		// gpu not supported, this may not be metal
 		return 0, nil
 	}
-	recommendedMaxVRAM := int64(C.getRecommendedMaxVRAM())
-	return recommendedMaxVRAM, nil
+	return uint64(C.getRecommendedMaxVRAM()), nil
 }

 func GetGPUInfo() GpuInfo {
--- a/gpu/types.go
+++ b/gpu/types.go
@@ -15,7 +15,7 @@ type GpuInfo struct {
 	Variant string `json:"variant,omitempty"`

 	// MinimumMemory represents the minimum memory required to use the GPU
-	MinimumMemory int64 `json:"-"`
+	MinimumMemory uint64 `json:"-"`

 	// TODO add other useful attributes about the card here for discovery information
 }
--- a/llm/ggla.go
+++ b/llm/ggla.go
@@ -49,7 +49,7 @@ func (llm *ggla) KV() KV {
 	return llm.kv
 }

-func (llm *ggla) Tensors() []*Tensor {
+func (llm *ggla) Tensors() Tensors {
 	return llm.tensors
 }

--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -13,16 +13,6 @@ type GGML struct {
 	model
 }

-func (ggml *GGML) LayerSize(prefix string) (n int64) {
-	for _, t := range ggml.Tensors() {
-		if strings.HasPrefix(t.Name, prefix) {
-			n += int64(t.size())
-		}
-	}
-
-	return
-}
-
 const (
 	fileTypeF32 uint32 = iota
 	fileTypeF16
@@ -101,7 +91,7 @@ func fileType(fileType uint32) string {

 type model interface {
 	KV() KV
-	Tensors() []*Tensor
+	Tensors() Tensors
 }

 type KV map[string]any
@@ -167,6 +157,36 @@ func (kv KV) ContextLength() uint64 {
 	return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
 }

+type Tensors []*Tensor
+
+func (ts Tensors) Layers() map[string]Layer {
+	layers := make(map[string]Layer)
+	for _, t := range ts {
+		parts := strings.Split(t.Name, ".")
+		if parts[0] == "blk" {
+			parts = parts[1:]
+		}
+
+		if _, ok := layers[parts[0]]; !ok {
+			layers[parts[0]] = make(Layer)
+		}
+
+		layers[parts[0]][strings.Join(parts[1:], ".")] = t
+	}
+
+	return layers
+}
+
+type Layer map[string]*Tensor
+
+func (l Layer) size() (size uint64) {
+	for _, t := range l {
+		size += t.size()
+	}
+
+	return size
+}
+
 type Tensor struct {
 	Name   string `json:"name"`
 	Kind   uint32 `json:"kind"`
@@ -304,49 +324,52 @@ func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
 	}, offset, nil
 }

-func (llm GGML) GraphSize(context, batch int) (int64, bool) {
-	embeddingLength := llm.KV().EmbeddingLength()
-	headCount := llm.KV().HeadCount()
-	headCountKV := llm.KV().HeadCountKV()
-	vocabLength := len(llm.KV()["tokenizer.ggml.tokens"].([]any))
-
-	var attnQKVWeight1 uint64 = 0
-	for _, t := range llm.Tensors() {
-		if strings.HasSuffix(t.Name, ".attn_qkv.weight") && len(t.Shape) >= 2 {
-			attnQKVWeight1 = t.Shape[1]
-			break
-		}
-	}
-
-	var ffnGate1 uint64 = 0
-	for _, t := range llm.Tensors() {
-		if strings.Index(t.Name, ".ffn_gate") > 0 && len(t.Shape) >= 2 {
-			ffnGate1 = t.Shape[1]
-			break
-		}
-	}
+func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64) {
+	embedding := llm.KV().EmbeddingLength()
+	heads := llm.KV().HeadCount()
+	headsKV := llm.KV().HeadCountKV()
+	vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))

 	switch llm.KV().Architecture() {
-	case "gemma", "command-r":
-		return 4 * int64(batch) * int64(embeddingLength+uint64(vocabLength)), true
-	case "phi2":
-		return max(
-			4*int64(batch)*int64(embeddingLength+uint64(vocabLength)),
-			4*int64(batch)*int64(1+4*embeddingLength+uint64(context)+attnQKVWeight1+uint64(context)*headCount),
-		), true
-	case "qwen2":
-		return max(
-			4*int64(batch)*int64(embeddingLength+uint64(vocabLength)),
-			4*int64(batch)*int64(1+2*embeddingLength+uint64(context)+uint64(context)*headCount),
-		), true
 	case "llama":
-		if ffnGate1 > 0 {
-			// moe
-			return 4 * int64(batch) * int64(2+3*embeddingLength+uint64(context)+uint64(context)*headCount+2*headCountKV+ffnGate1), true
-		}
-	
-		return 4 * int64(batch) * int64(1+4*embeddingLength+uint64(context)+uint64(context)*headCount), true
+		fullOffload = 4 * batch * (1 + 4*embedding + context*(1+heads))
+
+		partialOffload = 4 * batch * embedding
+		partialOffload += max(
+			4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embedding/heads*headsKV),
+			4*batch*(embedding+vocab)+embedding*vocab*105/128,
+		)
+	case "gemma":
+		fullOffload = 4 * batch * (embedding + vocab)
+		partialOffload = 4*batch*(2*embedding+vocab+1) + embedding*vocab*105/128
+	case "command-r":
+		fullOffload = max(
+			4*batch*(embedding+vocab),
+			4*batch*(2+4*embedding+context*(1+heads)),
+		)
+
+		partialOffload = max(
+			4*batch*(embedding+vocab)+embedding*vocab*105/128,
+			4*batch*(1+2*embedding+context*(1+heads))+ 4*embedding*context+embedding*embedding*9/16,
+		)
+	case "qwen2":
+		fullOffload = max(
+			4*batch*(embedding+vocab),
+			4*batch*(1+2*embedding+context+context*heads),
+		)
+
+		partialOffload = max(
+			4*batch*(embedding+vocab)+embedding*vocab*105/128,
+			4*(batch*(1+2*embedding+context*(1+heads))+embedding*(1+context)),
+		)
+	case "phi2":
+		fullOffload = max(
+			4*batch*(embedding+vocab),
+			4*batch*(1+4*embedding+context+context*heads),
+		)
+
+		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
 	}

-	return 0, false
+	return
 }
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -109,7 +109,7 @@ func (llm *gguf) KV() KV {
 	return llm.kv
 }

-func (llm *gguf) Tensors() []*Tensor {
+func (llm *gguf) Tensors() Tensors {
 	return llm.tensors
 }

--- a/llm/llama.cpp
+++ b/llm/llama.cpp
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -6,10 +6,81 @@ package llm
 // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
 // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
 // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
+// #include <stdlib.h>
 // #include "llama.h"
 import "C"
+import (
+	"fmt"
+	"unsafe"
+)

 // SystemInfo is an unused example of calling llama.cpp functions using CGo
 func SystemInfo() string {
 	return C.GoString(C.llama_print_system_info())
 }
+
+func Quantize(infile, outfile, filetype string) error {
+	cinfile := C.CString(infile)
+	defer C.free(unsafe.Pointer(cinfile))
+
+	coutfile := C.CString(outfile)
+	defer C.free(unsafe.Pointer(coutfile))
+
+	params := C.llama_model_quantize_default_params()
+	params.nthread = -1
+
+	switch filetype {
+	case "F32":
+		params.ftype = fileTypeF32
+	case "F16":
+		params.ftype = fileTypeF16
+	case "Q4_0":
+		params.ftype = fileTypeQ4_0
+	case "Q4_1":
+		params.ftype = fileTypeQ4_1
+	case "Q4_1_F16":
+		params.ftype = fileTypeQ4_1_F16
+	case "Q8_0":
+		params.ftype = fileTypeQ8_0
+	case "Q5_0":
+		params.ftype = fileTypeQ5_0
+	case "Q5_1":
+		params.ftype = fileTypeQ5_1
+	case "Q2_K":
+		params.ftype = fileTypeQ2_K
+	case "Q3_K_S":
+		params.ftype = fileTypeQ3_K_S
+	case "Q3_K_M":
+		params.ftype = fileTypeQ3_K_M
+	case "Q3_K_L":
+		params.ftype = fileTypeQ3_K_L
+	case "Q4_K_S":
+		params.ftype = fileTypeQ4_K_S
+	case "Q4_K_M":
+		params.ftype = fileTypeQ4_K_M
+	case "Q5_K_S":
+		params.ftype = fileTypeQ5_K_S
+	case "Q5_K_M":
+		params.ftype = fileTypeQ5_K_M
+	case "Q6_K":
+		params.ftype = fileTypeQ6_K
+	case "IQ2_XXS":
+		params.ftype = fileTypeIQ2_XXS
+	case "IQ2_XS":
+		params.ftype = fileTypeIQ2_XS
+	case "Q2_K_S":
+		params.ftype = fileTypeQ2_K_S
+	case "Q3_K_XS":
+		params.ftype = fileTypeQ3_K_XS
+	case "IQ3_XXS":
+		params.ftype = fileTypeIQ3_XXS
+	default:
+		return fmt.Errorf("unknown filetype: %s", filetype)
+	}
+
+	if retval := C.llama_model_quantize(cinfile, coutfile, &params); retval != 0 {
+		return fmt.Errorf("llama_model_quantize: %d", retval)
+	}
+
+	return nil
+}
--- a/llm/server.go
+++ b/llm/server.go
@@ -41,10 +41,6 @@ var cpuOnlyFamilies = []string{
 }

 func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error) {
-	if _, err := os.Stat(model); err != nil {
-		return nil, err
-	}
-
 	f, err := os.Open(model)
 	if err != nil {
 		return nil, err
@@ -65,67 +61,79 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		opts.NumCtx = 4
 	}

-	availableMemory, _ := gpu.CheckVRAM()
+	memoryAvailable, _ := gpu.CheckVRAM()
 	info := gpu.GetGPUInfo()

-	usedMemory := info.MinimumMemory
+	memoryMinimum := info.MinimumMemory
 	for _, projector := range projectors {
-		usedMemory += projectorMemoryRequirements(projector)
+		memoryMinimum += projectorMemoryRequirements(projector)

 		// multimodal models require at least 2048 context
 		opts.NumCtx = max(opts.NumCtx, 2048)
 	}

 	// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
-	kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.KV().BlockCount()) * int64(ggml.KV().EmbeddingLength()) / int64(ggml.KV().HeadCount()) * int64(ggml.KV().HeadCountKV())
+	var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()

-	graph, ok := ggml.GraphSize(opts.NumCtx, min(opts.NumCtx, opts.NumBatch))
-	if !ok {
-		graph = int64(ggml.KV().GQA()) * kv / 6
+	graphPartialOffload, graphFullOffload := ggml.GraphSize(uint64(opts.NumCtx), uint64(min(opts.NumCtx, opts.NumBatch)))
+	if graphPartialOffload == 0 {
+		graphPartialOffload = ggml.KV().GQA() * kv / 6
 	}

-	usedMemory += graph
-
-	if (usedMemory > availableMemory || slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture())) && info.Library != "metal" {
-		info.Library = "cpu"
+	if graphFullOffload == 0 {
+		graphFullOffload = graphPartialOffload
 	}

-	requiredMemory := usedMemory
+	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
+	memoryRequiredTotal := memoryMinimum + graphFullOffload

-	var layers int
-	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
-		layerMemory := ggml.LayerSize(fmt.Sprintf("blk.%d.", i)) + kv/int64(ggml.KV().BlockCount())
-		requiredMemory += layerMemory
+	// memoryRequiredPartial represents the memory required for partial GPU offloading (n > 0, n < layers)
+	memoryRequiredPartial := memoryMinimum + graphPartialOffload

-		if availableMemory > usedMemory+layerMemory && (opts.NumGPU < 0 || layers < opts.NumGPU) {
-			usedMemory += layerMemory
-			layers++
+	if info.Library != "metal" {
+		if memoryRequiredPartial > memoryAvailable || slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture()) {
+			info.Library = "cpu"
 		}
 	}

-	memOutputLayer := ggml.LayerSize("output.")
-	requiredMemory += memOutputLayer
+	var layerCount int
+	layers := ggml.Tensors().Layers()
+	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
+		memoryLayer := layers[fmt.Sprintf("%d", i)].size()

-	// only offload output layer if all repeating layers are offloaded
-	if layers >= int(ggml.KV().BlockCount()) && availableMemory > usedMemory+memOutputLayer {
-		usedMemory += memOutputLayer
-		layers++
+		// KV is proportional to the number of layers
+		memoryLayer += kv / ggml.KV().BlockCount()
+
+		memoryRequiredTotal += memoryLayer
+		if memoryAvailable > memoryRequiredPartial+memoryLayer {
+			memoryRequiredPartial += memoryLayer
+			layerCount++
+		}
+	}
+
+	memoryLayerOutput := layers["output"].size()
+	memoryRequiredTotal += memoryLayerOutput
+	if memoryAvailable > memoryRequiredTotal {
+		layerCount = int(ggml.KV().BlockCount()) + 1
+		memoryRequiredPartial = memoryRequiredTotal
+	}
+
+	if opts.NumGPU < 0 {
+		opts.NumGPU = layerCount
 	}

 	slog.Info(
 		"offload to gpu",
-		"layers", layers,
-		"required", format.HumanBytes2(requiredMemory),
-		"used", format.HumanBytes2(usedMemory),
-		"available", format.HumanBytes2(availableMemory),
+		"reallayers", opts.NumGPU,
+		"layers", layerCount,
+		"required", format.HumanBytes2(memoryRequiredTotal),
+		"used", format.HumanBytes2(memoryRequiredPartial),
+		"available", format.HumanBytes2(memoryAvailable),
 		"kv", format.HumanBytes2(kv),
-		"graph", format.HumanBytes2(graph),
+		"fulloffload", format.HumanBytes2(graphFullOffload),
+		"partialoffload", format.HumanBytes2(graphPartialOffload),
 	)

-	if opts.NumGPU < 0 && info.Library != "cpu" {
-		opts.NumGPU = layers
-	}
-
 	if len(adapters) > 1 {
 		return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
 	}
@@ -282,7 +290,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 	return nil, finalErr
 }

-func projectorMemoryRequirements(filename string) int64 {
+func projectorMemoryRequirements(filename string) uint64 {
 	file, err := os.Open(filename)
 	if err != nil {
 		return 0
@@ -294,18 +302,12 @@ func projectorMemoryRequirements(filename string) int64 {
 		return 0
 	}

-	prefixes := make(map[string]struct{})
-	for _, layer := range ggml.Tensors() {
-		parts := strings.Split(layer.Name, ".")
-		prefixes[strings.Join(parts[:2], ".")] = struct{}{}
+	var mem uint64
+	for _, layer := range ggml.Tensors().Layers() {
+		mem += layer.size()
 	}

-	var ask int64
-	for prefix := range prefixes {
-		ask += ggml.LayerSize(prefix)
-	}
-
-	return ask
+	return mem
 }

 type ServerStatus int
@@ -383,7 +385,8 @@ func (s *LlamaServer) Ping(ctx context.Context) error {

 func (s *LlamaServer) waitUntilRunning() error {
 	start := time.Now()
-	expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load
+	// TODO we need to wire up a better way to detect hangs during model load and startup of the server
+	expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load
 	ticker := time.NewTicker(50 * time.Millisecond)
 	defer ticker.Stop()

--- a/macapp/README.md
+++ b/macapp/README.md
@@ -14,7 +14,7 @@ go build .
 Then run the desktop app with `npm start`:

 ```
-cd app
+cd macapp
 npm install
 npm start
 ```
--- a/server/download.go
+++ b/server/download.go
@@ -247,7 +247,8 @@ func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w
 				}

 				if !part.lastUpdated.IsZero() && time.Since(part.lastUpdated) > 5*time.Second {
-					slog.Info(fmt.Sprintf("%s part %d stalled; retrying", b.Digest[7:19], part.N))
+					const msg = "%s part %d stalled; retrying. If this persists, press ctrl-c to exit, then 'ollama pull' to find a faster connection."
+					slog.Info(fmt.Sprintf(msg, b.Digest[7:19], part.N))
 					// reset last updated
 					part.lastUpdated = time.Time{}
 					return errPartStalled
--- a/server/images.go
+++ b/server/images.go
@@ -284,7 +284,7 @@ func realpath(mfDir, from string) string {
 	return abspath
 }

-func CreateModel(ctx context.Context, name, modelFileDir string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
+func CreateModel(ctx context.Context, name, modelFileDir, quantization string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
 	deleteMap := make(map[string]struct{})
 	if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil {
 		for _, layer := range append(manifest.Layers, manifest.Config) {
@@ -337,8 +337,27 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars

 			if ggufName != "" {
 				pathName = ggufName
-				slog.Debug(fmt.Sprintf("new image layer path: %s", pathName))
 				defer os.RemoveAll(ggufName)
+
+				if quantization != "" {
+					quantization = strings.ToUpper(quantization)
+					fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", "F16", quantization)})
+					tempfile, err := os.CreateTemp(filepath.Dir(ggufName), quantization)
+					if err != nil {
+						return err
+					}
+					defer os.RemoveAll(tempfile.Name())
+
+					if err := llm.Quantize(ggufName, tempfile.Name(), quantization); err != nil {
+						return err
+					}
+
+					if err := tempfile.Close(); err != nil {
+						return err
+					}
+
+					pathName = tempfile.Name()
+				}
 			}

 			bin, err := os.Open(pathName)
--- a/server/routes.go
+++ b/server/routes.go
@@ -647,7 +647,7 @@ func CreateModelHandler(c *gin.Context) {
 		ctx, cancel := context.WithCancel(c.Request.Context())
 		defer cancel()

-		if err := CreateModel(ctx, model, filepath.Dir(req.Path), commands, fn); err != nil {
+		if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()
@@ -913,6 +913,24 @@ func HeadBlobHandler(c *gin.Context) {
 }

 func CreateBlobHandler(c *gin.Context) {
+	path, err := GetBlobsPath(c.Param("digest"))
+	if err != nil {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+
+	_, err = os.Stat(path)
+	switch {
+	case errors.Is(err, os.ErrNotExist):
+		// noop
+	case err != nil:
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	default:
+		c.Status(http.StatusOK)
+		return
+	}
+
 	layer, err := NewLayer(c.Request.Body, "")
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -61,7 +61,7 @@ func Test_Routes(t *testing.T) {
 		fn := func(resp api.ProgressResponse) {
 			t.Logf("Status: %s", resp.Status)
 		}
-		err = CreateModel(context.TODO(), name, "", commands, fn)
+		err = CreateModel(context.TODO(), name, "", "", commands, fn)
 		assert.Nil(t, err)
 	}

--- a/types/model/digest.go
+++ b/types/model/digest.go
@@ -0,0 +1,83 @@
+package model
+
+import (
+	"database/sql"
+	"database/sql/driver"
+	"errors"
+	"fmt"
+	"log/slog"
+	"strings"
+	"unicode"
+)
+
+// Digest represents a digest of a model Manifest. It is a comparable value
+// type and is immutable.
+//
+// The zero Digest is not a valid digest.
+type Digest struct {
+	s string
+}
+
+// Type returns the digest type of the digest.
+//
+// Example:
+//
+//	ParseDigest("sha256-1234").Type() // returns "sha256"
+func (d Digest) Type() string {
+	typ, _, _ := strings.Cut(d.s, "-")
+	return typ
+}
+
+// String returns the digest in the form of "<digest-type>-<digest>", or the
+// empty string if the digest is invalid.
+func (d Digest) String() string { return d.s }
+
+// IsValid returns true if the digest is valid (not zero).
+//
+// A valid digest may be created only by ParseDigest, or
+// ParseName(name).Digest().
+func (d Digest) IsValid() bool { return d.s != "" }
+
+// LogValue implements slog.Value.
+func (d Digest) LogValue() slog.Value {
+	return slog.StringValue(d.String())
+}
+
+var (
+	_ slog.LogValuer = Digest{}
+)
+
+// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
+// Digest.
+func ParseDigest(s string) Digest {
+	typ, digest, ok := strings.Cut(s, "-")
+	if ok && isValidDigestType(typ) && isValidHex(digest) {
+		return Digest{s: s}
+	}
+	return Digest{}
+}
+
+func isValidDigestType(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	for _, r := range s {
+		if !unicode.IsLower(r) && !unicode.IsDigit(r) {
+			return false
+		}
+	}
+	return true
+}
+
+func isValidHex(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	for i := range s {
+		c := s[i]
+		if c < '0' || c > '9' && c < 'a' || c > 'f' {
+			return false
+		}
+	}
+	return true
+}
--- a/types/model/digest_test.go
+++ b/types/model/digest_test.go
@@ -0,0 +1,46 @@
+package model
+
+import "testing"
+
+var testDigests = map[string]Digest{
+	"":                 {},
+	"sha256-1234":      {s: "sha256-1234"},
+	"sha256-5678":      {s: "sha256-5678"},
+	"blake2-9abc":      {s: "blake2-9abc"},
+	"-1234":            {},
+	"sha256-":          {},
+	"sha256-1234-5678": {},
+	"sha256-P":         {}, //         invalid  hex
+	"sha256-1234P":     {},
+	"---":              {},
+}
+
+func TestDigestParse(t *testing.T) {
+	// Test cases.
+	for s, want := range testDigests {
+		got := ParseDigest(s)
+		t.Logf("ParseDigest(%q) = %#v", s, got)
+		if got != want {
+			t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
+		}
+	}
+}
+
+func TestDigestString(t *testing.T) {
+	// Test cases.
+	for s, d := range testDigests {
+		want := s
+		if !d.IsValid() {
+			want = ""
+		}
+		got := d.String()
+		if got != want {
+			t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
+		}
+
+		got = ParseDigest(s).String()
+		if got != want {
+			t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
+		}
+	}
+}
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -0,0 +1,572 @@
+package model
+
+import (
+	"cmp"
+	"errors"
+	"hash/maphash"
+	"io"
+	"log/slog"
+	"slices"
+	"strings"
+	"sync"
+
+	"github.com/ollama/ollama/types/structs"
+)
+
+// Errors
+var (
+	// ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not
+	// used by this package, but are exported so that other packages can
+	// use them, instead of defining their own errors for them.
+	ErrInvalidName    = errors.New("invalid model name")
+	ErrIncompleteName = errors.New("incomplete model name")
+	ErrInvalidDigest  = errors.New("invalid digest")
+)
+
+// Defaults
+const (
+	// DefaultMask is the default mask used by [Name.DisplayShortest].
+	DefaultMask = "registry.ollama.ai/library/_:latest"
+
+	// DefaultFill is the default fill used by [ParseName].
+	DefaultFill = "registry.ollama.ai/library/_:latest"
+)
+
+const MaxNamePartLen = 128
+
+type PartKind int
+
+// Levels of concreteness
+const (
+	// Each value aligns with its index in the Name.parts array.
+
+	PartHost PartKind = iota
+	PartNamespace
+	PartModel
+	PartTag
+	PartBuild
+	PartDigest
+
+	// Invalid is a special part that is used to indicate that a part is
+	// invalid. It is not a valid part of a Name.
+	//
+	// It should be kept as the last part in the list.
+	PartInvalid
+)
+
+var kindNames = map[PartKind]string{
+	PartHost:      "Host",
+	PartNamespace: "Namespace",
+	PartModel:     "Name",
+	PartTag:       "Tag",
+	PartBuild:     "Build",
+	PartDigest:    "Digest",
+	PartInvalid:   "Invalid",
+}
+
+func (k PartKind) String() string {
+	return cmp.Or(kindNames[k], "Unknown")
+}
+
+// Name is an opaque reference to a model. It holds the parts of a model
+// with the case preserved, but is not directly comparable with other Names
+// since model names can be represented with different casing depending on
+// the use case. For instance, "Mistral" and "mistral" are the same model
+// but each version may have come from different sources (e.g. copied from a
+// Web page, or from a file path).
+//
+// Valid Names can ONLY be constructed by calling [ParseName].
+//
+// A Name is valid if and only if is have a valid Model part. The other parts
+// are optional.
+//
+// A Name is considered "complete" if it has all parts present. To check if a
+// Name is complete, use [Name.IsComplete].
+//
+// To compare two names in a case-insensitive manner, use [Name.EqualFold].
+//
+// The parts of a Name are:
+//
+//   - Host: the domain of the model (optional)
+//   - Namespace: the namespace of the model (optional)
+//   - Model: the name of the model (required)
+//   - Tag: the tag of the model (optional)
+//   - Build: the build of the model; usually the quantization or "file type" (optional)
+//
+// The parts can be obtained in their original form by calling [Name.Parts].
+//
+// To check if a Name has at minimum a valid model part, use [Name.IsValid].
+//
+// To make a Name by filling in missing parts from another Name, use [Fill].
+type Name struct {
+	_     structs.Incomparable
+	parts [6]string // host, namespace, model, tag, build, digest
+
+	// TODO(bmizerany): track offsets and hold s (raw string) here? We
+	// could pack the offsets all into a single uint64 since the first
+	// parts take less bits since their max offset is less than the max
+	// offset of the next part. This would save a ton of bytes per Name
+	// and mean zero allocations for String.
+}
+
+// ParseNameFill parses s into a Name, and returns the result of filling it with
+// defaults. The input string must be a valid string
+// representation of a model name in the form:
+//
+//	[host/][namespace/]<model>[:tag][+build][@<digest-type>-<digest>]
+//
+// The name part is required, all others are optional. If a part is missing,
+// it is left empty in the returned Name. If a part is invalid, the zero Ref
+// value is returned.
+//
+// The build part is normalized to uppercase.
+//
+// Examples of valid paths:
+//
+//	"example.com/library/mistral:7b+x"
+//	"example.com/eva/mistral:7b+Q4_0"
+//	"mistral:7b+x"
+//	"example.com/mike/mistral:latest+Q4_0"
+//	"example.com/bruce/mistral:latest"
+//	"example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef"
+//
+// Examples of invalid paths:
+//
+//	"example.com/mistral:7b+"
+//	"example.com/mistral:7b+Q4_0+"
+//	"x/y/z/z:8n+I"
+//	""
+//
+// It returns the zero value if any part is invalid.
+//
+// As a rule of thumb, an valid name is one that can be round-tripped with
+// the [Name.String] method. That means ("x+") is invalid because
+// [Name.String] will not print a "+" if the build is empty.
+//
+// For more about filling in missing parts, see [Fill].
+func ParseNameFill(s, defaults string) Name {
+	var r Name
+	parts(s)(func(kind PartKind, part string) bool {
+		if kind == PartInvalid {
+			r = Name{}
+			return false
+		}
+		if kind == PartDigest && !ParseDigest(part).IsValid() {
+			r = Name{}
+			return false
+		}
+		r.parts[kind] = part
+		return true
+	})
+	if r.IsValid() || r.IsResolved() {
+		if defaults == "" {
+			return r
+		}
+		return Fill(r, ParseNameFill(defaults, ""))
+	}
+	return Name{}
+}
+
+// ParseName is equal to ParseNameFill(s, DefaultFill).
+func ParseName(s string) Name {
+	return ParseNameFill(s, DefaultFill)
+}
+
+func MustParseNameFill(s, defaults string) Name {
+	r := ParseNameFill(s, "")
+	if !r.IsValid() {
+		panic("model.MustParseName: invalid name: " + s)
+	}
+	return r
+}
+
+// Fill fills in the missing parts of dst with the parts of src.
+//
+// The returned Name will only be valid if dst is valid.
+func Fill(dst, src Name) Name {
+	var r Name
+	for i := range r.parts {
+		r.parts[i] = cmp.Or(dst.parts[i], src.parts[i])
+	}
+	return r
+}
+
+// WithBuild returns a copy of r with the build set to the given string.
+func (r Name) WithBuild(build string) Name {
+	r.parts[PartBuild] = build
+	return r
+}
+
+func (r Name) WithDigest(digest Digest) Name {
+	r.parts[PartDigest] = digest.String()
+	return r
+}
+
+var mapHashSeed = maphash.MakeSeed()
+
+// MapHash returns a case insensitive hash for use in maps and equality
+// checks. For a convenient way to compare names, use [Name.EqualFold].
+//
+//nolint:errcheck
+func (r Name) MapHash() uint64 {
+	// correctly hash the parts with case insensitive comparison
+	var h maphash.Hash
+	h.SetSeed(mapHashSeed)
+	for _, part := range r.Parts() {
+		// downcase the part for hashing
+		for i := range part {
+			c := part[i]
+			if c >= 'A' && c <= 'Z' {
+				c = c - 'A' + 'a'
+			}
+			h.WriteByte(c)
+		}
+	}
+	return h.Sum64()
+}
+
+func (r Name) slice(from, to PartKind) Name {
+	var v Name
+	copy(v.parts[from:to+1], r.parts[from:to+1])
+	return v
+}
+
+// DisplayShortest returns the shortest possible display string in form:
+//
+//	[host/][<namespace>/]<model>[:<tag>]
+//
+// The host is omitted if it is the mask host is the same as r.
+// The namespace is omitted if the host and the namespace are the same as r.
+// The tag is omitted if it is the mask tag is the same as r.
+func (r Name) DisplayShortest(mask string) string {
+	mask = cmp.Or(mask, DefaultMask)
+	d := ParseName(mask)
+	if !d.IsValid() {
+		panic("mask is an invalid Name")
+	}
+	equalSlice := func(form, to PartKind) bool {
+		return r.slice(form, to).EqualFold(d.slice(form, to))
+	}
+	if equalSlice(PartHost, PartNamespace) {
+		r.parts[PartNamespace] = ""
+	}
+	if equalSlice(PartHost, PartHost) {
+		r.parts[PartHost] = ""
+	}
+	if equalSlice(PartTag, PartTag) {
+		r.parts[PartTag] = ""
+	}
+	return r.slice(PartHost, PartTag).String()
+}
+
+var seps = [...]string{
+	PartHost:      "/",
+	PartNamespace: "/",
+	PartModel:     ":",
+	PartTag:       "+",
+	PartBuild:     "@",
+	PartDigest:    "",
+}
+
+// WriteTo implements io.WriterTo. It writes the fullest possible display
+// string in form:
+//
+//	<host>/<namespace>/<model>:<tag>+<build>@<digest-type>-<digest>
+//
+// Missing parts and their separators are not written.
+//
+// The full digest is always prefixed with "@". That is if [Name.IsValid]
+// reports false and [Name.IsResolved] reports true, then the string is
+// returned as "@<digest-type>-<digest>".
+func (r Name) writeTo(w io.StringWriter) error {
+	var partsWritten int
+	for i := range r.parts {
+		if r.parts[i] == "" {
+			continue
+		}
+		if partsWritten > 0 || i == int(PartDigest) {
+			if _, err := w.WriteString(seps[i-1]); err != nil {
+				return err
+			}
+		}
+		if _, err := w.WriteString(r.parts[i]); err != nil {
+			return err
+		}
+		partsWritten++
+	}
+	return nil
+}
+
+var builderPool = sync.Pool{
+	New: func() interface{} {
+		return &strings.Builder{}
+	},
+}
+
+// String returns the fullest possible display string in form:
+//
+//	<host>/<namespace>/<model>:<tag>+<build>
+//
+// If any part is missing, it is omitted from the display string.
+//
+// For the fullest possible display string without the build, use
+// [Name.DisplayFullest].
+func (r Name) String() string {
+	b := builderPool.Get().(*strings.Builder)
+	defer builderPool.Put(b)
+	b.Reset()
+	b.Grow(50) // arbitrarily long enough for most names
+	_ = r.writeTo(b)
+	return b.String()
+}
+
+// GoString implements fmt.GoStringer. It returns a string suitable for
+// debugging and logging. It is similar to [Name.String] but it always
+// returns a string that includes all parts of the Name, with missing parts
+// replaced with a ("?").
+func (r Name) GoString() string {
+	for i := range r.parts {
+		r.parts[i] = cmp.Or(r.parts[i], "?")
+	}
+	return r.String()
+}
+
+// LogValue implements slog.Valuer.
+func (r Name) LogValue() slog.Value {
+	return slog.StringValue(r.GoString())
+}
+
+// IsComplete reports whether the Name is fully qualified. That is it has a
+// domain, namespace, name, tag, and build.
+func (r Name) IsComplete() bool {
+	return !slices.Contains(r.parts[:PartDigest], "")
+}
+
+// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the
+// build part to be present.
+func (r Name) IsCompleteNoBuild() bool {
+	return !slices.Contains(r.parts[:PartBuild], "")
+}
+
+// IsResolved reports true if the Name has a valid digest.
+//
+// It is possible to have a valid Name, or a complete Name that is not
+// resolved.
+func (r Name) IsResolved() bool {
+	return r.Digest().IsValid()
+}
+
+// Digest returns the digest part of the Name, if any.
+//
+// If Digest returns a non-empty string, then [Name.IsResolved] will return
+// true, and digest is considered valid.
+func (r Name) Digest() Digest {
+	// This was already validated by ParseName, so we can just return it.
+	return Digest{r.parts[PartDigest]}
+}
+
+// EqualFold reports whether r and o are equivalent model names, ignoring
+// case.
+func (r Name) EqualFold(o Name) bool {
+	return r.CompareFold(o) == 0
+}
+
+// CompareFold performs a case-insensitive cmp.Compare on r and o.
+//
+// This can be used with [slices.SortFunc].
+//
+// For simple equality checks, use [Name.EqualFold].
+func (r Name) CompareFold(o Name) int {
+	return slices.CompareFunc(r.parts[:], o.parts[:], compareFold)
+}
+
+func compareFold(a, b string) int {
+	return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int {
+		return cmp.Compare(downcase(a), downcase(b))
+	})
+}
+
+func downcase(r rune) rune {
+	if r >= 'A' && r <= 'Z' {
+		return r - 'A' + 'a'
+	}
+	return r
+}
+
+// TODO(bmizerany): driver.Value? (MarshalText etc should be enough)
+
+// Parts returns the parts of the Name in order of concreteness.
+//
+// The length of the returned slice is always 5.
+func (r Name) Parts() []string {
+	return slices.Clone(r.parts[:])
+}
+
+// iter_Seq2 is a iter.Seq2 defined here to avoid the current build
+// restrictions in the go1.22 iter package requiring the
+// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag,
+// which we are not yet ready to support.
+//
+// Once we are ready to support rangefunc, this can be removed and replaced
+// with the iter.Seq2 type.
+type iter_Seq2[A, B any] func(func(A, B) bool)
+
+// Parts returns a sequence of the parts of a Name string from most specific
+// to least specific.
+//
+// It normalizes the input string by removing "http://" and "https://" only.
+// No other normalizations are performed.
+func parts(s string) iter_Seq2[PartKind, string] {
+	return func(yield func(PartKind, string) bool) {
+		//nolint:gosimple
+		if strings.HasPrefix(s, "http://") {
+			s = s[len("http://"):]
+		}
+		//nolint:gosimple
+		if strings.HasPrefix(s, "https://") {
+			s = s[len("https://"):]
+		}
+
+		if len(s) > MaxNamePartLen || len(s) == 0 {
+			return
+		}
+
+		yieldValid := func(kind PartKind, part string) bool {
+			if !isValidPart(kind, part) {
+				yield(PartInvalid, "")
+				return false
+			}
+			return yield(kind, part)
+		}
+
+		numConsecutiveDots := 0
+		partLen := 0
+		state, j := PartDigest, len(s)
+		for i := len(s) - 1; i >= 0; i-- {
+			if partLen++; partLen > MaxNamePartLen {
+				// catch a part that is too long early, so
+				// we don't keep spinning on it, waiting for
+				// an isInValidPart check which would scan
+				// over it again.
+				yield(PartInvalid, "")
+				return
+			}
+
+			switch s[i] {
+			case '@':
+				switch state {
+				case PartDigest:
+					if !yieldValid(PartDigest, s[i+1:j]) {
+						return
+					}
+					if i == 0 {
+						// This is the form
+						// "@<digest>" which is valid.
+						//
+						// We're done.
+						return
+					}
+					state, j, partLen = PartBuild, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			case '+':
+				switch state {
+				case PartBuild, PartDigest:
+					if !yieldValid(PartBuild, s[i+1:j]) {
+						return
+					}
+					state, j, partLen = PartTag, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			case ':':
+				switch state {
+				case PartTag, PartBuild, PartDigest:
+					if !yieldValid(PartTag, s[i+1:j]) {
+						return
+					}
+					state, j, partLen = PartModel, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			case '/':
+				switch state {
+				case PartModel, PartTag, PartBuild, PartDigest:
+					if !yieldValid(PartModel, s[i+1:j]) {
+						return
+					}
+					state, j = PartNamespace, i
+				case PartNamespace:
+					if !yieldValid(PartNamespace, s[i+1:j]) {
+						return
+					}
+					state, j, partLen = PartHost, i, 0
+				default:
+					yield(PartInvalid, "")
+					return
+				}
+			default:
+				if s[i] == '.' {
+					if numConsecutiveDots++; numConsecutiveDots > 1 {
+						yield(PartInvalid, "")
+						return
+					}
+				} else {
+					numConsecutiveDots = 0
+				}
+				if !isValidByteFor(state, s[i]) {
+					yield(PartInvalid, "")
+					return
+				}
+			}
+		}
+
+		if state <= PartNamespace {
+			yieldValid(state, s[:j])
+		} else {
+			yieldValid(PartModel, s[:j])
+		}
+	}
+}
+
+func (r Name) IsZero() bool {
+	return r.parts == [6]string{}
+}
+
+// IsValid reports if a model has at minimum a valid model part.
+func (r Name) IsValid() bool {
+	// Parts ensures we only have valid parts, so no need to validate
+	// them here, only check if we have a name or not.
+	return r.parts[PartModel] != ""
+}
+
+// isValidPart reports if s contains all valid characters for the given
+// part kind.
+func isValidPart(kind PartKind, s string) bool {
+	if s == "" {
+		return false
+	}
+	for _, c := range []byte(s) {
+		if !isValidByteFor(kind, c) {
+			return false
+		}
+	}
+	return true
+}
+
+func isValidByteFor(kind PartKind, c byte) bool {
+	if kind == PartNamespace && c == '.' {
+		return false
+	}
+	if c == '.' || c == '-' {
+		return true
+	}
+	if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' {
+		return true
+	}
+	return false
+}
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -0,0 +1,490 @@
+package model
+
+import (
+	"bytes"
+	"cmp"
+	"fmt"
+	"log/slog"
+	"slices"
+	"strings"
+	"testing"
+)
+
+type fields struct {
+	host, namespace, model, tag, build string
+	digest                             string
+}
+
+func fieldsFromName(p Name) fields {
+	return fields{
+		host:      p.parts[PartHost],
+		namespace: p.parts[PartNamespace],
+		model:     p.parts[PartModel],
+		tag:       p.parts[PartTag],
+		build:     p.parts[PartBuild],
+		digest:    p.parts[PartDigest],
+	}
+}
+
+var testNames = map[string]fields{
+	"mistral:latest":                 {model: "mistral", tag: "latest"},
+	"mistral":                        {model: "mistral"},
+	"mistral:30B":                    {model: "mistral", tag: "30B"},
+	"mistral:7b":                     {model: "mistral", tag: "7b"},
+	"mistral:7b+Q4_0":                {model: "mistral", tag: "7b", build: "Q4_0"},
+	"mistral+KQED":                   {model: "mistral", build: "KQED"},
+	"mistral.x-3:7b+Q4_0":            {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
+	"mistral:7b+q4_0":                {model: "mistral", tag: "7b", build: "q4_0"},
+	"llama2":                         {model: "llama2"},
+	"user/model":                     {namespace: "user", model: "model"},
+	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
+	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
+
+	// invalid digest
+	"mistral:latest@invalid256-": {},
+	"mistral:latest@-123":        {},
+	"mistral:latest@!-123":       {},
+	"mistral:latest@1-!":         {},
+	"mistral:latest@":            {},
+
+	// resolved
+	"x@sha123-1": {model: "x", digest: "sha123-1"},
+	"@sha456-2":  {digest: "sha456-2"},
+
+	"@@sha123-1": {},
+
+	// preserves case for build
+	"x+b": {model: "x", build: "b"},
+
+	// invalid (includes fuzzing trophies)
+	" / / : + ": {},
+	" / : + ":   {},
+	" : + ":     {},
+	" + ":       {},
+	" : ":       {},
+	" / ":       {},
+	" /":        {},
+	"/ ":        {},
+	"/":         {},
+	":":         {},
+	"+":         {},
+
+	// (".") in namepsace is not allowed
+	"invalid.com/7b+x": {},
+
+	"invalid:7b+Q4_0:latest": {},
+	"in valid":               {},
+	"invalid/y/z/foo":        {},
+	"/0":                     {},
+	"0 /0":                   {},
+	"0 /":                    {},
+	"0/":                     {},
+	":/0":                    {},
+	"+0/00000":               {},
+	"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
+	"0//0":                        {},
+	"m+^^^":                       {},
+	"file:///etc/passwd":          {},
+	"file:///etc/passwd:latest":   {},
+	"file:///etc/passwd:latest+u": {},
+
+	":x": {},
+	"+x": {},
+	"x+": {},
+
+	// Disallow ("\.+") in any part to prevent path traversal anywhere
+	// we convert the name to a path.
+	"../etc/passwd":  {},
+	".../etc/passwd": {},
+	"./../passwd":    {},
+	"./0+..":         {},
+
+	strings.Repeat("a", MaxNamePartLen):   {model: strings.Repeat("a", MaxNamePartLen)},
+	strings.Repeat("a", MaxNamePartLen+1): {},
+}
+
+// TestConsecutiveDots tests that consecutive dots are not allowed in any
+// part, to avoid path traversal. There also are some tests in testNames, but
+// this test is more exhaustive and exists to emphasize the importance of
+// preventing path traversal.
+func TestNameConsecutiveDots(t *testing.T) {
+	for i := 1; i < 10; i++ {
+		s := strings.Repeat(".", i)
+		if i > 1 {
+			if g := ParseNameFill(s, "").String(); g != "" {
+				t.Errorf("ParseName(%q) = %q; want empty string", s, g)
+			}
+		} else {
+			if g := ParseNameFill(s, "").String(); g != s {
+				t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
+			}
+		}
+	}
+}
+
+func TestNameParts(t *testing.T) {
+	var p Name
+	if w, g := int(PartDigest+1), len(p.Parts()); w != g {
+		t.Errorf("Parts() = %d; want %d", g, w)
+	}
+}
+
+func TestNamePartString(t *testing.T) {
+	if g := PartKind(-2).String(); g != "Unknown" {
+		t.Errorf("Unknown part = %q; want %q", g, "Unknown")
+	}
+	for kind, name := range kindNames {
+		if g := kind.String(); g != name {
+			t.Errorf("%s = %q; want %q", kind, g, name)
+		}
+	}
+}
+
+func TestParseName(t *testing.T) {
+	for baseName, want := range testNames {
+		for _, prefix := range []string{"", "https://", "http://"} {
+			// We should get the same results with or without the
+			// http(s) prefixes
+			s := prefix + baseName
+
+			t.Run(s, func(t *testing.T) {
+				name := ParseNameFill(s, "")
+				got := fieldsFromName(name)
+				if got != want {
+					t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
+				}
+
+				// test round-trip
+				if !ParseNameFill(name.String(), "").EqualFold(name) {
+					t.Errorf("ParseName(%q).String() = %s; want %s", s, name.String(), baseName)
+				}
+			})
+		}
+	}
+}
+
+func TestCompleteWithAndWithoutBuild(t *testing.T) {
+	cases := []struct {
+		in              string
+		complete        bool
+		completeNoBuild bool
+	}{
+		{"", false, false},
+		{"incomplete/mistral:7b+x", false, false},
+		{"incomplete/mistral:7b+Q4_0", false, false},
+		{"incomplete:7b+x", false, false},
+		{"complete.com/x/mistral:latest+Q4_0", true, true},
+		{"complete.com/x/mistral:latest", false, true},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.in, func(t *testing.T) {
+			p := ParseNameFill(tt.in, "")
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			if g := p.IsComplete(); g != tt.complete {
+				t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
+			}
+			if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
+				t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
+			}
+		})
+	}
+
+	// Complete uses Parts which returns a slice, but it should be
+	// inlined when used in Complete, preventing any allocations or
+	// escaping to the heap.
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(ParseNameFill("complete.com/x/mistral:latest+Q4_0", "").IsComplete())
+	})
+	if allocs > 0 {
+		t.Errorf("Complete allocs = %v; want 0", allocs)
+	}
+}
+
+func TestNameLogValue(t *testing.T) {
+	cases := []string{
+		"example.com/library/mistral:latest+Q4_0",
+		"mistral:latest",
+		"mistral:7b+Q4_0",
+	}
+	for _, s := range cases {
+		t.Run(s, func(t *testing.T) {
+			var b bytes.Buffer
+			log := slog.New(slog.NewTextHandler(&b, nil))
+			name := ParseNameFill(s, "")
+			log.Info("", "name", name)
+			want := fmt.Sprintf("name=%s", name.GoString())
+			got := b.String()
+			if !strings.Contains(got, want) {
+				t.Errorf("expected log output to contain %q; got %q", want, got)
+			}
+		})
+	}
+}
+
+func TestNameGoString(t *testing.T) {
+	cases := []struct {
+		name         string
+		in           string
+		wantString   string
+		wantGoString string // default is tt.in
+	}{
+		{
+			name:         "Complete Name",
+			in:           "example.com/library/mistral:latest+Q4_0",
+			wantGoString: "example.com/library/mistral:latest+Q4_0@?",
+		},
+		{
+			name:         "Short Name",
+			in:           "mistral:latest",
+			wantGoString: "?/?/mistral:latest+?@?",
+		},
+		{
+			name:         "Long Name",
+			in:           "library/mistral:latest",
+			wantGoString: "?/library/mistral:latest+?@?",
+		},
+		{
+			name:         "Case Preserved",
+			in:           "Library/Mistral:Latest",
+			wantGoString: "?/Library/Mistral:Latest+?@?",
+		},
+		{
+			name:         "With digest",
+			in:           "Library/Mistral:Latest@sha256-123456",
+			wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			p := ParseNameFill(tt.in, "")
+			tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
+			if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
+				t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
+			}
+		})
+	}
+}
+
+func TestDisplayShortest(t *testing.T) {
+	cases := []struct {
+		in        string
+		mask      string
+		want      string
+		wantPanic bool
+	}{
+		{"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
+		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
+
+		// case-insensitive
+		{"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false},
+		{"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false},
+		{"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false},
+
+		// invalid mask
+		{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
+
+		// DefaultMask
+		{"registry.ollama.ai/library/mistral:latest+Q4_0", DefaultMask, "mistral", false},
+
+		// Auto-Fill
+		{"x", "example.com/library/_:latest", "x", false},
+		{"x", "example.com/library/_:latest+Q4_0", "x", false},
+		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
+		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
+	}
+
+	for _, tt := range cases {
+		t.Run("", func(t *testing.T) {
+			defer func() {
+				if tt.wantPanic {
+					if recover() == nil {
+						t.Errorf("expected panic")
+					}
+				}
+			}()
+
+			p := ParseNameFill(tt.in, "")
+			t.Logf("ParseName(%q) = %#v", tt.in, p)
+			if g := p.DisplayShortest(tt.mask); g != tt.want {
+				t.Errorf("got = %q; want %q", g, tt.want)
+			}
+		})
+	}
+}
+
+func TestParseNameAllocs(t *testing.T) {
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(ParseNameFill("example.com/mistral:7b+Q4_0", ""))
+	})
+	if allocs > 0 {
+		t.Errorf("ParseName allocs = %v; want 0", allocs)
+	}
+}
+
+func BenchmarkParseName(b *testing.B) {
+	b.ReportAllocs()
+
+	for range b.N {
+		keep(ParseNameFill("example.com/mistral:7b+Q4_0", ""))
+	}
+}
+
+func FuzzParseName(f *testing.F) {
+	f.Add("example.com/mistral:7b+Q4_0")
+	f.Add("example.com/mistral:7b+q4_0")
+	f.Add("example.com/mistral:7b+x")
+	f.Add("x/y/z:8n+I")
+	f.Add(":x")
+	f.Add("@sha256-123456")
+	f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
+	f.Add(":@!@")
+	f.Add("...")
+	f.Fuzz(func(t *testing.T, s string) {
+		r0 := ParseNameFill(s, "")
+
+		if strings.Contains(s, "..") && !r0.IsZero() {
+			t.Fatalf("non-zero value for path with '..': %q", s)
+		}
+
+		if !r0.IsValid() && !r0.IsResolved() {
+			if !r0.EqualFold(Name{}) {
+				t.Errorf("expected invalid path to be zero value; got %#v", r0)
+			}
+			t.Skipf("invalid path: %q", s)
+		}
+
+		for _, p := range r0.Parts() {
+			if len(p) > MaxNamePartLen {
+				t.Errorf("part too long: %q", p)
+			}
+		}
+
+		if !strings.EqualFold(r0.String(), s) {
+			t.Errorf("String() did not round-trip with case insensitivity: %q\ngot  = %q\nwant = %q", s, r0.String(), s)
+		}
+
+		r1 := ParseNameFill(r0.String(), "")
+		if !r0.EqualFold(r1) {
+			t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
+		}
+	})
+}
+
+func TestFill(t *testing.T) {
+	cases := []struct {
+		dst  string
+		src  string
+		want string
+	}{
+		{"mistral", "o.com/library/PLACEHOLDER:latest+Q4_0", "o.com/library/mistral:latest+Q4_0"},
+		{"o.com/library/mistral", "PLACEHOLDER:latest+Q4_0", "o.com/library/mistral:latest+Q4_0"},
+		{"", "o.com/library/mistral:latest+Q4_0", "o.com/library/mistral:latest+Q4_0"},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.dst, func(t *testing.T) {
+			r := Fill(ParseNameFill(tt.dst, ""), ParseNameFill(tt.src, ""))
+			if r.String() != tt.want {
+				t.Errorf("Fill(%q, %q) = %q; want %q", tt.dst, tt.src, r, tt.want)
+			}
+		})
+	}
+}
+
+func TestNameStringAllocs(t *testing.T) {
+	name := ParseNameFill("example.com/ns/mistral:latest+Q4_0", "")
+	allocs := testing.AllocsPerRun(1000, func() {
+		keep(name.String())
+	})
+	if allocs > 1 {
+		t.Errorf("String allocs = %v; want 0", allocs)
+	}
+}
+
+func ExampleFill() {
+	defaults := ParseNameFill("registry.ollama.com/library/PLACEHOLDER:latest+Q4_0", "")
+	r := Fill(ParseNameFill("mistral", ""), defaults)
+	fmt.Println(r)
+
+	// Output:
+	// registry.ollama.com/library/mistral:latest+Q4_0
+}
+
+func ExampleName_MapHash() {
+	m := map[uint64]bool{}
+
+	// key 1
+	m[ParseNameFill("mistral:latest+q4", "").MapHash()] = true
+	m[ParseNameFill("miSTRal:latest+Q4", "").MapHash()] = true
+	m[ParseNameFill("mistral:LATest+Q4", "").MapHash()] = true
+
+	// key 2
+	m[ParseNameFill("mistral:LATest", "").MapHash()] = true
+
+	fmt.Println(len(m))
+	// Output:
+	// 2
+}
+
+func ExampleName_CompareFold_sort() {
+	names := []Name{
+		ParseNameFill("mistral:latest", ""),
+		ParseNameFill("mistRal:7b+q4", ""),
+		ParseNameFill("MIstral:7b", ""),
+	}
+
+	slices.SortFunc(names, Name.CompareFold)
+
+	for _, n := range names {
+		fmt.Println(n)
+	}
+
+	// Output:
+	// MIstral:7b
+	// mistRal:7b+q4
+	// mistral:latest
+}
+
+func ExampleName_completeAndResolved() {
+	for _, s := range []string{
+		"x/y/z:latest+q4_0@sha123-1",
+		"x/y/z:latest+q4_0",
+		"@sha123-1",
+	} {
+		name := ParseNameFill(s, "")
+		fmt.Printf("complete:%v resolved:%v  digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
+	}
+
+	// Output:
+	// complete:true resolved:true  digest:sha123-1
+	// complete:true resolved:false  digest:
+	// complete:false resolved:true  digest:sha123-1
+}
+
+func ExampleName_DisplayShortest() {
+	name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", "")
+
+	fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
+	fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
+	fmt.Println(name.DisplayShortest("example.com/_/_:_"))
+	fmt.Println(name.DisplayShortest("_/_/_:_"))
+
+	// Default
+	name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", "")
+	fmt.Println(name.DisplayShortest(""))
+
+	// Output:
+	// mistral
+	// jmorganca/mistral
+	// jmorganca/mistral:latest
+	// example.com/jmorganca/mistral:latest
+	// mistral
+}
+
+func keep[T any](v T) T { return v }
--- a/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
+++ b/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("/0")
--- a/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
+++ b/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0//0")
--- a/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
+++ b/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0 /0")
--- a/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
+++ b/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("+0/00000")
--- a/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
+++ b/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
@@ -0,0 +1,2 @@
+go test fuzz v1
+string(":")
--- a/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
+++ b/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")
--- a/types/structs/structs.go
+++ b/types/structs/structs.go
@@ -0,0 +1,15 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Package structs contains the Incomparable type.
+package structs
+
+// Incomparable is a zero-width incomparable type. If added as the
+// first field in a struct, it marks that struct as not comparable
+// (can't do == or be a map key) and usually doesn't add any width to
+// the struct (unless the struct has only small fields).
+//
+// By making a struct incomparable, you can prevent misuse (prevent
+// people from using ==), but also you can shrink generated binaries,
+// as the compiler can omit equality funcs from the binary.
+type Incomparable [0]func()
Author	SHA1	Message	Date
Blake Mizerany	42f2cc408e	types/model: remove (*Digest).Scan and Digest.Value (#3589 )	2024-04-11 00:37:26 -07:00
Blake Mizerany	9446b795b5	types/model: remove DisplayLong (#3587 )	2024-04-10 16:55:12 -07:00
Blake Mizerany	62f8cda3b3	types/model: remove MarshalText/UnmarshalText from Digest (#3586 )	2024-04-10 16:52:49 -07:00
Blake Mizerany	6a1de23175	types/model: init with Name and Digest types (#3541 )	2024-04-10 16:30:05 -07:00
Blake Mizerany	a7b431e743	server: provide helpful workaround hint when stalling on pull (#3584 ) This is a quick fix to help users who are stuck on the "pull" step at 99%. In the near future we're introducing a new registry client that should/will hopefully be smarter. In the meantime, this should unblock the users hitting issue #1736.	2024-04-10 16:24:37 -07:00
Michael Yang	5a25f93522	Merge pull request #3478 from ollama/mxyng/tensor-layer refactor tensor query	2024-04-10 12:45:03 -07:00
Michael Yang	7e33a017c0	partial offloading	2024-04-10 11:37:20 -07:00
Michael Yang	8b2c10061c	refactor tensor query	2024-04-10 11:37:20 -07:00
Michael Yang	c5c451ca3b	Merge pull request #3579 from ollama/mxyng/fix-ci fix ci	2024-04-10 11:37:01 -07:00
Michael Yang	2b4ca6cf36	fix ci	2024-04-10 11:35:12 -07:00
Eli Bendersky	ad90b9ab3d	api: start adding documentation to package api (#2878 ) * api: start adding documentation to package api Updates #2840 * Fix lint typo report	2024-04-10 13:31:55 -04:00
Eli Bendersky	4340f8eba4	examples: start adding Go examples using api/ (#2879 ) We can have the same examples as e.g. https://github.com/ollama/ollama-python/tree/main/examples here. Using consistent naming and renaming the existing example to have -http- since it uses direct HTTP requests rather than api/ Updates #2840	2024-04-10 13:26:45 -04:00
Daniel Hiltgen	4c7db6b7e9	Merge pull request #3566 from dhiltgen/more_time Handle very slow model loads	2024-04-09 16:53:49 -07:00
Michael Yang	c03f0e3c3d	Merge pull request #3565 from ollama/mxyng/rope fix: rope	2024-04-09 16:36:55 -07:00
Daniel Hiltgen	c5ff443b9f	Handle very slow model loads During testing, we're seeing some models take over 3 minutes.	2024-04-09 16:35:10 -07:00
Michael Yang	01114b4526	fix: rope	2024-04-09 16:15:24 -07:00
Blake Mizerany	1524f323a3	Revert "build.go: introduce a friendlier way to build Ollama (#3548 )" (#3564 )	2024-04-09 15:57:45 -07:00
Blake Mizerany	fccf3eecaa	build.go: introduce a friendlier way to build Ollama (#3548 ) This commit introduces a more friendly way to build Ollama dependencies and the binary without abusing `go generate` and removing the unnecessary extra steps it brings with it. This script also provides nicer feedback to the user about what is happening during the build process. At the end, it prints a helpful message to the user about what to do next (e.g. run the new local Ollama).	2024-04-09 14:18:47 -07:00
Michael Yang	c77d45d836	Merge pull request #3506 from ollama/mxyng/quantize-redux cgo quantize	2024-04-09 12:32:53 -07:00
Jeffrey Morgan	5ec12cec6c	update llama.cpp submodule to `1b67731` (#3561 )	2024-04-09 15:10:17 -04:00
Michael Yang	d9578d2bad	Merge pull request #3559 from ollama/mxyng/ci ci: use go-version-file	2024-04-09 11:03:18 -07:00
Michael Yang	cb8352d6b4	ci: use go-version-file	2024-04-09 09:50:12 -07:00
Alex Mavrogiannis	fc6558f47f	Correct directory reference in macapp/README (#3555 )	2024-04-09 09:48:46 -04:00
Michael Yang	9502e5661f	cgo quantize	2024-04-08 15:31:08 -07:00
Michael Yang	e1c9a2a00f	no blob create if already exists	2024-04-08 15:09:48 -07:00