some more menu options...

Touch ID for cli install; server restarts
app: fix status item icons
2025-12-30 11:10:25 -05:00 · 2024-04-28 12:40:52 -04:00 · 2024-04-27 22:42:38 -04:00 · 2024-04-27 15:57:57 -04:00 · 2024-04-27 14:20:10 -04:00 · 2024-04-26 21:14:28 -07:00
94 changed files with 1732 additions and 19703 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -311,29 +311,18 @@ jobs:
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-cpu
-          path: |
-            llm/build
-            dist/windows-amd64
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-cuda
-          path: |
-            llm/build
-            dist/windows-amd64
      - uses: actions/download-artifact@v4
        with:
          name: windows-cuda-deps
-          path: dist/deps
      - uses: actions/download-artifact@v4
        with:
          name: windows-rocm-deps
-          path: dist/deps
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-rocm
-          path: |
-            llm/build
-            dist/windows-amd64
      - run: dir llm/build
      - run: |
          $gopath=(get-command go).source | split-path -parent
@@ -342,8 +331,6 @@ jobs:
          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
          $env:PATH="$gopath;$env:PATH"
          $env:OLLAMA_SKIP_GENERATE="1"
-          $env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
-          $env:HIP_PATH=$(resolve-path ".\dist\deps")
          & .\scripts\build_windows.ps1
      - uses: actions/upload-artifact@v4
        with:
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,5 +1,15 @@
 name: test

+concurrency:
+  # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
+  # cancels running CI jobs and starts all new ones.
+  #
+  # For non-PR pushes, concurrency.group needs to be unique for every distinct
+  # CI run we want to have happen. Use run_id, which in practice means all
+  # non-PR CI runs will be allowed to run without preempting each other.
+  group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
 on:
  pull_request:
    paths:
@@ -21,7 +31,9 @@ jobs:
      - id: changes
        run: |
          changed() {
-            git diff-tree -r --no-commit-id --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
+            git diff-tree -r --no-commit-id --name-only \
+              $(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
+              ${{ github.event.pull_request.head.sha }} \
              | xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
          }

@@ -283,7 +295,6 @@ jobs:
        with:
          go-version-file: go.mod
          cache: true
-      - run: go get
      - run: |
          case ${{ matrix.arch }} in
            amd64) echo ARCH=x86_64 ;;
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,4 @@ ggml-metal.metal
 .idea
 test_data
 *.crt
-llm/build
+llm/build
--- a/api/types.go
+++ b/api/types.go
@@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 func DefaultOptions() Options {
 	return Options{
 		// options set on request to runner
-		NumPredict:       -1,
-		NumKeep:          0,
+		NumPredict: -1,
+
+		// set a minimal num_keep to avoid issues on context shifts
+		NumKeep:          4,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
--- a/app/.gitignore
+++ b/app/.gitignore
@@ -1 +1,2 @@
 ollama.syso
+app
--- a/app/AppDelegate.h
+++ b/app/AppDelegate.h
@@ -0,0 +1,7 @@
+#import <Cocoa/Cocoa.h>
+
+@interface AppDelegate : NSObject <NSApplicationDelegate>
+
+- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
+
+@end
--- a/app/README.md
+++ b/app/README.md
@@ -1,10 +1,6 @@
 # Ollama App

-## Linux
-
-TODO
-
-## MacOS
+## macOS

 TODO

--- a/app/app_darwin.go
+++ b/app/app_darwin.go
@@ -0,0 +1,76 @@
+package main
+
+// #cgo CFLAGS: -x objective-c
+// #cgo LDFLAGS: -framework Cocoa -framework LocalAuthentication -framework ServiceManagement
+// #include "app_darwin.h"
+import "C"
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"syscall"
+)
+
+func init() {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		panic(err)
+	}
+
+	ServerLogFile = filepath.Join(home, ".ollama", "logs", "server.log")
+}
+
+func run() {
+	initLogging()
+	slog.Info("ollama macOS app started")
+
+	// Ask to move to applications directory
+	moving := C.askToMoveToApplications()
+	if moving {
+		return
+	}
+
+	C.killOtherInstances()
+
+	code := C.installSymlink()
+	if code != 0 {
+		slog.Error("Failed to install symlink")
+	}
+
+	exe, err := os.Executable()
+	if err != nil {
+		panic(err)
+	}
+
+	var options ServerOptions
+
+	ctx, cancel := context.WithCancel(context.Background())
+	var done chan int
+
+	done, err = SpawnServer(ctx, filepath.Join(filepath.Dir(exe), "..", "Resources", "ollama"), options)
+	if err != nil {
+		slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
+		done = make(chan int, 1)
+		done <- 1
+	}
+
+	// Run the native macOS app
+	// Note: this will block until the app is closed
+	C.run()
+
+	slog.Info("ollama macOS app closed")
+
+	cancel()
+	slog.Info("Waiting for ollama server to shutdown...")
+	if done != nil {
+		<-done
+	}
+	slog.Info("Ollama app exiting")
+}
+
+//export Quit
+func Quit() {
+	syscall.Kill(os.Getpid(), syscall.SIGTERM)
+}
--- a/app/app_darwin.h
+++ b/app/app_darwin.h
@@ -0,0 +1,13 @@
+#import <Cocoa/Cocoa.h>
+
+@interface AppDelegate : NSObject <NSApplicationDelegate>
+- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
+@end
+
+void run();
+void killOtherInstances();
+bool askToMoveToApplications();
+int createSymlinkWithAuthorization();
+int installSymlink();
+extern void Restart();
+extern void Quit();
--- a/app/app_darwin.m
+++ b/app/app_darwin.m
@@ -0,0 +1,282 @@
+#import <AppKit/AppKit.h>
+#import <Cocoa/Cocoa.h>
+#import <CoreServices/CoreServices.h>
+#import <Security/Security.h>
+#import <ServiceManagement/ServiceManagement.h>
+#import "app_darwin.h"
+
+@interface AppDelegate ()
+
+@property (strong, nonatomic) NSStatusItem *statusItem;
+
+@end
+
+@implementation AppDelegate
+
+- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
+    // show status menu
+    NSMenu *menu = [[NSMenu alloc] init];
+
+    NSMenuItem *aboutMenuItem = [[NSMenuItem alloc] initWithTitle:@"About Ollama" action:@selector(aboutOllama) keyEquivalent:@""];
+    [aboutMenuItem setTarget:self];
+    [menu addItem:aboutMenuItem];
+
+    // Settings submenu
+    NSMenu *settingsMenu = [[NSMenu alloc] initWithTitle:@"Settings"];
+
+    // Submenu items
+    NSMenuItem *chooseModelDirectoryItem = [[NSMenuItem alloc] initWithTitle:@"Choose model directory..." action:@selector(chooseModelDirectory) keyEquivalent:@""];
+    [chooseModelDirectoryItem setTarget:self];
+    [chooseModelDirectoryItem setEnabled:YES];
+    [settingsMenu addItem:chooseModelDirectoryItem];
+
+    NSMenuItem *exposeExternallyItem = [[NSMenuItem alloc] initWithTitle:@"Allow external connections" action:@selector(toggleExposeExternally:) keyEquivalent:@""];
+    [exposeExternallyItem setTarget:self];
+    [exposeExternallyItem setState:NSOffState]; // Set initial state to off
+    [exposeExternallyItem setEnabled:YES];
+    [settingsMenu addItem:exposeExternallyItem];
+
+    NSMenuItem *allowCrossOriginItem = [[NSMenuItem alloc] initWithTitle:@"Allow browser requests" action:@selector(toggleCrossOrigin:) keyEquivalent:@""];
+    [allowCrossOriginItem setTarget:self];
+    [allowCrossOriginItem setState:NSOffState]; // Set initial state to off
+    [allowCrossOriginItem setEnabled:YES];
+    [settingsMenu addItem:allowCrossOriginItem];
+
+    NSMenuItem *settingsMenuItem = [[NSMenuItem alloc] initWithTitle:@"Settings" action:nil keyEquivalent:@""];
+    [settingsMenuItem setSubmenu:settingsMenu];
+    [menu addItem:settingsMenuItem];
+
+    [menu addItemWithTitle:@"Quit Ollama" action:@selector(quit) keyEquivalent:@"q"];
+
+    self.statusItem = [[NSStatusBar systemStatusBar] statusItemWithLength:NSVariableStatusItemLength];
+    [self.statusItem addObserver:self forKeyPath:@"button.effectiveAppearance" options:NSKeyValueObservingOptionNew|NSKeyValueObservingOptionInitial context:nil];
+
+    self.statusItem.menu = menu;
+    [self showIcon];
+}
+
+- (void)aboutOllama {
+    [[NSApplication sharedApplication] orderFrontStandardAboutPanel:nil];
+}
+
+- (void)toggleCrossOrigin:(id)sender {
+    NSMenuItem *item = (NSMenuItem *)sender;
+    if ([item state] == NSOffState) {
+        // Do something when cross-origin requests are allowed
+        [item setState:NSOnState];
+    } else {
+        // Do something when cross-origin requests are disallowed
+        [item setState:NSOffState];
+    }
+}
+
+- (void)toggleExposeExternally:(id)sender {
+    NSMenuItem *item = (NSMenuItem *)sender;
+    if ([item state] == NSOffState) {
+        // Do something when Ollama is exposed externally
+        [item setState:NSOnState];
+    } else {
+        // Do something when Ollama is not exposed externally
+        [item setState:NSOffState];
+    }
+}
+
+- (void)chooseModelDirectory {
+    NSOpenPanel *openPanel = [NSOpenPanel openPanel];
+    [openPanel setCanChooseFiles:NO];
+    [openPanel setCanChooseDirectories:YES];
+    [openPanel setAllowsMultipleSelection:NO];
+
+    NSInteger result = [openPanel runModal];
+    if (result == NSModalResponseOK) {
+        NSURL *selectedDirectoryURL = [openPanel URLs].firstObject;
+        // Do something with the selected directory URL
+    }
+}
+
+-(void) showIcon {
+    NSAppearance* appearance = self.statusItem.button.effectiveAppearance;
+    NSString* appearanceName = (NSString*)(appearance.name);
+    NSString* iconName = [[appearanceName lowercaseString] containsString:@"dark"] ? @"iconDark" : @"icon";
+    NSImage* statusImage = [NSImage imageNamed:iconName];
+    [statusImage setTemplate:YES];
+    self.statusItem.button.image = statusImage;
+}
+
+-(void)observeValueForKeyPath:(NSString *)keyPath ofObject:(id)object change:(NSDictionary<NSKeyValueChangeKey,id> *)change context:(void *)context {
+    [self showIcon];
+}
+
+- (void)quit {
+    [NSApp stop:nil];
+}
+
+@end
+
+void run() {
+    @autoreleasepool {
+        [NSApplication sharedApplication];
+        AppDelegate *appDelegate = [[AppDelegate alloc] init];
+        [NSApp setDelegate:appDelegate];
+        [NSApp run];
+    }
+}
+
+// killOtherInstances kills all other instances of the app currently
+// running. This way we can ensure that only the most recently started
+// instance of Ollama is running
+void killOtherInstances() {
+    pid_t pid = getpid();
+    NSArray *all = [[NSWorkspace sharedWorkspace] runningApplications];
+    NSMutableArray *apps = [NSMutableArray array];
+
+    for (NSRunningApplication *app in all) {
+        if ([app.bundleIdentifier isEqualToString:[[NSBundle mainBundle] bundleIdentifier]] ||
+            [app.bundleIdentifier isEqualToString:@"ai.ollama.ollama"] ||
+            [app.bundleIdentifier isEqualToString:@"com.electron.ollama"]) {
+            if (app.processIdentifier != pid) {
+                [apps addObject:app];
+            }
+        }
+    }
+
+    for (NSRunningApplication *app in apps) {
+        kill(app.processIdentifier, SIGTERM);
+    }
+
+    NSDate *startTime = [NSDate date];
+    for (NSRunningApplication *app in apps) {
+        while (!app.terminated) {
+            if (-[startTime timeIntervalSinceNow] >= 5) {
+                kill(app.processIdentifier, SIGKILL);
+                break;
+            }
+
+            [[NSRunLoop currentRunLoop] runUntilDate:[NSDate dateWithTimeIntervalSinceNow:0.1]];
+        }
+    }
+}
+
+bool askToMoveToApplications() {
+    NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
+    if ([bundlePath hasPrefix:@"/Applications"]) {
+        return false;
+    }
+
+    NSAlert *alert = [[NSAlert alloc] init];
+    [alert setMessageText:@"Move to Applications?"];
+    [alert setInformativeText:@"Ollama works best when run from the Applications directory."];
+    [alert addButtonWithTitle:@"Move to Applications"];
+    [alert addButtonWithTitle:@"Don't move"];
+
+    [NSApp activateIgnoringOtherApps:YES];
+
+    if ([alert runModal] != NSAlertFirstButtonReturn) {
+        return false;
+    }
+
+    // move to applications
+    NSString *applicationsPath = @"/Applications";
+    NSString *newPath = [applicationsPath stringByAppendingPathComponent:@"Ollama.app"];
+    NSFileManager *fileManager = [NSFileManager defaultManager];
+
+    // Check if the newPath already exists
+    if ([fileManager fileExistsAtPath:newPath]) {
+        NSError *removeError = nil;
+        [fileManager removeItemAtPath:newPath error:&removeError];
+        if (removeError) {
+            NSLog(@"Error removing file at %@: %@", newPath, removeError);
+            return false; // or handle the error
+        }
+    }
+
+    NSError *moveError = nil;
+    [fileManager moveItemAtPath:bundlePath toPath:newPath error:&moveError];
+    if (moveError) {
+        NSLog(@"Error moving file from %@ to %@: %@", bundlePath, newPath, moveError);
+        return false;
+    }
+
+    NSLog(@"Opening %@", newPath);
+    NSError *error = nil;
+    NSWorkspace *workspace = [NSWorkspace sharedWorkspace];
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+    [workspace launchApplicationAtURL:[NSURL fileURLWithPath:newPath]
+               options:NSWorkspaceLaunchNewInstance | NSWorkspaceLaunchDefault
+               configuration:@{}
+               error:&error];
+
+    return true;
+}
+
+int installSymlink() {
+    NSString *linkPath = @"/usr/local/bin/ollama";
+    NSError *error = nil;
+
+    NSFileManager *fileManager = [NSFileManager defaultManager];
+    NSString *symlinkPath = [fileManager destinationOfSymbolicLinkAtPath:linkPath error:&error];
+    NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
+    NSString *execPath = [[NSBundle mainBundle] executablePath];
+    NSString *resPath = [[NSBundle mainBundle] pathForResource:@"ollama" ofType:nil];
+
+    // if the symlink already exists and points to the right place, don't prompt
+    if ([symlinkPath isEqualToString:resPath]) {
+        NSLog(@"symbolic link already exists and points to the right place");
+        return 0;
+    }
+
+    NSString *authorizationPrompt = @"Ollama is trying to install its command line interface (CLI) tool.";
+
+    AuthorizationRef auth = NULL;
+    OSStatus createStatus = AuthorizationCreate(NULL, kAuthorizationEmptyEnvironment, kAuthorizationFlagDefaults, &auth);
+    if (createStatus != errAuthorizationSuccess) {
+        NSLog(@"Error creating authorization");
+        return -1;
+    }
+
+    NSString * bundleIdentifier = [[NSBundle mainBundle] bundleIdentifier];
+    NSString *rightNameString = [NSString stringWithFormat:@"%@.%@", bundleIdentifier, @"auth3"];
+    const char *rightName = rightNameString.UTF8String;
+
+    OSStatus getRightResult = AuthorizationRightGet(rightName, NULL);
+    if (getRightResult == errAuthorizationDenied) {
+        if (AuthorizationRightSet(auth, rightName, (__bridge CFTypeRef _Nonnull)(@(kAuthorizationRuleAuthenticateAsAdmin)), (__bridge CFStringRef _Nullable)(authorizationPrompt), NULL, NULL) != errAuthorizationSuccess) {
+            NSLog(@"Failed to set right");
+            return -1;
+        }
+    }
+
+    AuthorizationItem right = { .name = rightName, .valueLength = 0, .value = NULL, .flags = 0 };
+    AuthorizationRights rights = { .count = 1, .items = &right };
+    AuthorizationFlags flags = (AuthorizationFlags)(kAuthorizationFlagExtendRights | kAuthorizationFlagInteractionAllowed);
+    AuthorizationItem iconAuthorizationItem = {.name = kAuthorizationEnvironmentIcon, .valueLength = 0, .value = NULL, .flags = 0};
+    AuthorizationEnvironment authorizationEnvironment = {.count = 0, .items = NULL};
+
+    BOOL failedToUseSystemDomain = NO;
+    OSStatus copyStatus = AuthorizationCopyRights(auth, &rights, &authorizationEnvironment, flags, NULL);
+    if (copyStatus != errAuthorizationSuccess) {
+        failedToUseSystemDomain = YES;
+
+        if (copyStatus == errAuthorizationCanceled) {
+            NSLog(@"User cancelled authorization");
+            return -1;
+        } else {
+            NSLog(@"Failed copying system domain rights: %d", copyStatus);
+            return -1;
+        }
+    }
+
+    const char *toolPath = "/bin/ln";
+    const char *args[] = {"-s", "-F", [resPath UTF8String], "/usr/local/bin/ollama", NULL};
+    FILE *pipe = NULL;
+
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+    OSStatus status = AuthorizationExecuteWithPrivileges(auth, toolPath, kAuthorizationFlagDefaults, (char *const *)args, &pipe);
+    if (status != errAuthorizationSuccess) {
+        NSLog(@"Failed to create symlink");
+        return -1;
+    }
+
+    AuthorizationFree(auth, kAuthorizationFlagDestroyRights);
+    return 0;
+}
--- a/app/app_windows.go
+++ b/app/app_windows.go
@@ -0,0 +1,166 @@
+package main
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"log/slog"
+	"os"
+	"os/exec"
+	"os/signal"
+	"path/filepath"
+	"strings"
+	"syscall"
+
+	"github.com/ollama/ollama/app/lifecycle"
+	"github.com/ollama/ollama/app/store"
+	"github.com/ollama/ollama/app/tray"
+	"github.com/ollama/ollama/app/updater"
+)
+
+func init() {
+	AppName += ".exe"
+	CLIName += ".exe"
+	// Logs, configs, downloads go to LOCALAPPDATA
+	localAppData := os.Getenv("LOCALAPPDATA")
+	AppDataDir = filepath.Join(localAppData, "Ollama")
+	AppLogFile = filepath.Join(AppDataDir, "app.log")
+	ServerLogFile = filepath.Join(AppDataDir, "server.log")
+
+	// Executables are stored in APPDATA
+	AppDir = filepath.Join(localAppData, "Programs", "Ollama")
+
+	// Make sure we have PATH set correctly for any spawned children
+	paths := strings.Split(os.Getenv("PATH"), ";")
+	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
+	found := false
+	for _, path := range paths {
+		d, err := filepath.Abs(path)
+		if err != nil {
+			continue
+		}
+		if strings.EqualFold(AppDir, d) {
+			found = true
+		}
+	}
+	if !found {
+		paths = append(paths, AppDir)
+
+		pathVal := strings.Join(paths, ";")
+		slog.Debug("setting PATH=" + pathVal)
+		err := os.Setenv("PATH", pathVal)
+		if err != nil {
+			slog.Error(fmt.Sprintf("failed to update PATH: %s", err))
+		}
+	}
+
+	// Make sure our logging dir exists
+	_, err := os.Stat(AppDataDir)
+	if errors.Is(err, os.ErrNotExist) {
+		if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
+			slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
+		}
+	}
+}
+
+func ShowLogs() {
+	cmd_path := "c:\\Windows\\system32\\cmd.exe"
+	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
+	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
+	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
+	err := cmd.Start()
+	if err != nil {
+		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
+	}
+}
+
+func Start() {
+	cmd_path := "c:\\Windows\\system32\\cmd.exe"
+	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
+	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
+	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
+	err := cmd.Start()
+	if err != nil {
+		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
+	}
+}
+
+func run() {
+	initLogging()
+
+	slog.Info("ollama windows app started")
+
+	ctx, cancel := context.WithCancel(context.Background())
+	var done chan int
+
+	t, err := tray.NewTray()
+	if err != nil {
+		log.Fatalf("Failed to start: %s", err)
+	}
+	callbacks := t.GetCallbacks()
+
+	signals := make(chan os.Signal, 1)
+	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
+
+	go func() {
+		slog.Debug("starting callback loop")
+		for {
+			select {
+			case <-callbacks.Quit:
+				slog.Debug("quit called")
+				t.Quit()
+			case <-signals:
+				slog.Debug("shutting down due to signal")
+				t.Quit()
+			case <-callbacks.Update:
+				err := updater.DoUpgrade(cancel, done)
+				if err != nil {
+					slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
+				}
+			case <-callbacks.ShowLogs:
+				ShowLogs()
+			case <-callbacks.DoFirstUse:
+				err := lifecycle.GetStarted()
+				if err != nil {
+					slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
+				}
+			}
+		}
+	}()
+
+	if !store.GetFirstTimeRun() {
+		slog.Debug("First time run")
+		err = t.DisplayFirstUseNotification()
+		if err != nil {
+			slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
+		}
+		store.SetFirstTimeRun(true)
+	} else {
+		slog.Debug("Not first time, skipping first run notification")
+	}
+
+	if isServerRunning(ctx) {
+		slog.Info("Detected another instance of ollama running, exiting")
+		os.Exit(1)
+	}
+
+	done, err = SpawnServer(ctx, CLIName)
+	if err != nil {
+		// TODO - should we retry in a backoff loop?
+		// TODO - should we pop up a warning and maybe add a menu item to view application logs?
+		slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
+		done = make(chan int, 1)
+		done <- 1
+	}
+
+	updater.StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
+
+	t.Run()
+	cancel()
+	slog.Info("Waiting for ollama server to shutdown...")
+	if done != nil {
+		<-done
+	}
+	slog.Info("Ollama app exiting")
+}
--- a/app/darwin/Ollama.app/Contents/Info.plist
+++ b/app/darwin/Ollama.app/Contents/Info.plist
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+  <dict>
+    <key>CFBundleDisplayName</key>
+    <string>Ollama</string>
+    <key>CFBundleExecutable</key>
+    <string>Ollama</string>
+    <key>CFBundleIconFile</key>
+    <string>icon.icns</string>
+    <key>CFBundleIdentifier</key>
+    <string>com.ollama.ollama</string>
+    <key>CFBundleInfoDictionaryVersion</key>
+    <string>6.0</string>
+    <key>CFBundleName</key>
+    <string>Ollama</string>
+    <key>CFBundlePackageType</key>
+    <string>APPL</string>
+    <key>CFBundleShortVersionString</key>
+    <string>0.0.0</string>
+    <key>CFBundleVersion</key>
+    <string>0.0.0</string>
+    <key>DTCompiler</key>
+    <string>com.apple.compilers.llvm.clang.1_0</string>
+    <key>DTSDKBuild</key>
+    <string>22E245</string>
+    <key>DTSDKName</key>
+    <string>macosx13.3</string>
+    <key>DTXcode</key>
+    <string>1431</string>
+    <key>DTXcodeBuild</key>
+    <string>14E300c</string>
+    <key>LSApplicationCategoryType</key>
+    <string>public.app-category.developer-tools</string>
+    <key>LSMinimumSystemVersion</key>
+    <string>11.0</string>
+    <key>LSUIElement</key>
+    <true/>
+  </dict>
+</plist>
--- a/app/darwin/Ollama.app/Contents/Resources/icon.icns
+++ b/app/darwin/Ollama.app/Contents/Resources/icon.icns
--- a/app/darwin/Ollama.app/Contents/Resources/icon.png
+++ b/app/darwin/Ollama.app/Contents/Resources/icon.png
--- a/app/darwin/Ollama.app/Contents/Resources/icon@2x.png
+++ b/app/darwin/Ollama.app/Contents/Resources/icon@2x.png
--- a/app/darwin/Ollama.app/Contents/Resources/iconDark.png
+++ b/app/darwin/Ollama.app/Contents/Resources/iconDark.png
--- a/app/darwin/Ollama.app/Contents/Resources/iconDark@2x.png
+++ b/app/darwin/Ollama.app/Contents/Resources/iconDark@2x.png
--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@@ -1,5 +1,3 @@
-//go:build !windows
-
 package lifecycle

 import "fmt"
--- a/app/lifecycle/lifecycle.go
+++ b/app/lifecycle/lifecycle.go
@@ -1,92 +0,0 @@
-package lifecycle
-
-import (
-	"context"
-	"fmt"
-	"log"
-	"log/slog"
-	"os"
-	"os/signal"
-	"syscall"
-
-	"github.com/ollama/ollama/app/store"
-	"github.com/ollama/ollama/app/tray"
-)
-
-func Run() {
-	InitLogging()
-
-	ctx, cancel := context.WithCancel(context.Background())
-	var done chan int
-
-	t, err := tray.NewTray()
-	if err != nil {
-		log.Fatalf("Failed to start: %s", err)
-	}
-	callbacks := t.GetCallbacks()
-
-	signals := make(chan os.Signal, 1)
-	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
-
-	go func() {
-		slog.Debug("starting callback loop")
-		for {
-			select {
-			case <-callbacks.Quit:
-				slog.Debug("quit called")
-				t.Quit()
-			case <-signals:
-				slog.Debug("shutting down due to signal")
-				t.Quit()
-			case <-callbacks.Update:
-				err := DoUpgrade(cancel, done)
-				if err != nil {
-					slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
-				}
-			case <-callbacks.ShowLogs:
-				ShowLogs()
-			case <-callbacks.DoFirstUse:
-				err := GetStarted()
-				if err != nil {
-					slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
-				}
-			}
-		}
-	}()
-
-	// Are we first use?
-	if !store.GetFirstTimeRun() {
-		slog.Debug("First time run")
-		err = t.DisplayFirstUseNotification()
-		if err != nil {
-			slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
-		}
-		store.SetFirstTimeRun(true)
-	} else {
-		slog.Debug("Not first time, skipping first run notification")
-	}
-
-	if IsServerRunning(ctx) {
-		slog.Info("Detected another instance of ollama running, exiting")
-		os.Exit(1)
-	} else {
-		done, err = SpawnServer(ctx, CLIName)
-		if err != nil {
-			// TODO - should we retry in a backoff loop?
-			// TODO - should we pop up a warning and maybe add a menu item to view application logs?
-			slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
-			done = make(chan int, 1)
-			done <- 1
-		}
-	}
-
-	StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
-
-	t.Run()
-	cancel()
-	slog.Info("Waiting for ollama server to shutdown...")
-	if done != nil {
-		<-done
-	}
-	slog.Info("Ollama app exiting")
-}
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@@ -1,9 +0,0 @@
-//go:build !windows
-
-package lifecycle
-
-import "log/slog"
-
-func ShowLogs() {
-	slog.Warn("ShowLogs not yet implemented")
-}
--- a/app/lifecycle/logging_windows.go
+++ b/app/lifecycle/logging_windows.go
@@ -1,19 +0,0 @@
-package lifecycle
-
-import (
-	"fmt"
-	"log/slog"
-	"os/exec"
-	"syscall"
-)
-
-func ShowLogs() {
-	cmd_path := "c:\\Windows\\system32\\cmd.exe"
-	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
-	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
-	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
-	err := cmd.Start()
-	if err != nil {
-		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
-	}
-}
--- a/app/lifecycle/paths.go
+++ b/app/lifecycle/paths.go
@@ -70,10 +70,5 @@ func init() {
 			}
 		}

-	} else if runtime.GOOS == "darwin" {
-		// TODO
-		AppName += ".app"
-		// } else if runtime.GOOS == "linux" {
-		// TODO
 	}
 }
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -1,4 +1,4 @@
-package lifecycle
+package main

 import (
 	"fmt"
@@ -7,7 +7,7 @@ import (
 	"path/filepath"
 )

-func InitLogging() {
+func initLogging() {
 	level := slog.LevelInfo

 	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
@@ -41,6 +41,4 @@ func InitLogging() {
 	})

 	slog.SetDefault(slog.New(handler))
-
-	slog.Info("ollama app started")
 }
--- a/app/main.go
+++ b/app/main.go
@@ -2,11 +2,15 @@ package main

 // Compile with the following to get rid of the cmd pop up on windows
 // go build -ldflags="-H windowsgui" .
-
-import (
-	"github.com/ollama/ollama/app/lifecycle"
+var (
+	AppName       string
+	CLIName       string
+	AppDir        string
+	AppDataDir    string
+	AppLogFile    string
+	ServerLogFile string
 )

 func main() {
-	lifecycle.Run()
+	run()
 }
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -1,4 +1,4 @@
-package lifecycle
+package main

 import (
 	"context"
@@ -14,65 +14,41 @@ import (
 	"github.com/ollama/ollama/api"
 )

-func getCLIFullPath(command string) string {
-	cmdPath := ""
-	appExe, err := os.Executable()
-	if err == nil {
-		cmdPath = filepath.Join(filepath.Dir(appExe), command)
-		_, err := os.Stat(cmdPath)
-		if err == nil {
-			return cmdPath
-		}
-	}
-	cmdPath, err = exec.LookPath(command)
-	if err == nil {
-		_, err := os.Stat(cmdPath)
-		if err == nil {
-			return cmdPath
-		}
-	}
-	pwd, err := os.Getwd()
-	if err == nil {
-		cmdPath = filepath.Join(pwd, command)
-		_, err = os.Stat(cmdPath)
-		if err == nil {
-			return cmdPath
-		}
-	}
-
-	return command
+type ServerOptions struct {
+	Cors       bool
+	Expose     bool
+	ModelsPath string
 }

-func SpawnServer(ctx context.Context, command string) (chan int, error) {
-	done := make(chan int)
+func start(ctx context.Context, command string, options ServerOptions) (*exec.Cmd, error) {
+	cmd := getCmd(ctx, command)

-	logDir := filepath.Dir(ServerLogFile)
-	_, err := os.Stat(logDir)
-	if errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(logDir, 0o755); err != nil {
-			return done, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
-		}
+	// set environment variables
+	if options.ModelsPath != "" {
+		cmd.Env = append(cmd.Env, fmt.Sprintf("OLLAMA_MODELS=%s", options.ModelsPath))
+	}
+
+	if options.Cors {
+		cmd.Env = append(cmd.Env, "OLLAMA_ORIGINS=*")
+	}
+
+	if options.Expose {
+		cmd.Env = append(cmd.Env, "OLLAMA_HOST=0.0.0.0")
 	}

-	cmd := getCmd(ctx, getCLIFullPath(command))
-	// send stdout and stderr to a file
 	stdout, err := cmd.StdoutPipe()
 	if err != nil {
-		return done, fmt.Errorf("failed to spawn server stdout pipe %s", err)
+		return nil, fmt.Errorf("failed to spawn server stdout pipe: %w", err)
 	}
 	stderr, err := cmd.StderrPipe()
 	if err != nil {
-		return done, fmt.Errorf("failed to spawn server stderr pipe %s", err)
-	}
-	stdin, err := cmd.StdinPipe()
-	if err != nil {
-		return done, fmt.Errorf("failed to spawn server stdin pipe %s", err)
+		return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
 	}

 	// TODO - rotation
 	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 	if err != nil {
-		return done, fmt.Errorf("failed to create server log %w", err)
+		return nil, fmt.Errorf("failed to create server log: %w", err)
 	}
 	go func() {
 		defer logFile.Close()
@@ -117,19 +93,38 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {

 	// run the command and wait for it to finish
 	if err := cmd.Start(); err != nil {
-		return done, fmt.Errorf("failed to start server %w", err)
+		return nil, fmt.Errorf("failed to start server %w", err)
 	}
 	if cmd.Process != nil {
 		slog.Info(fmt.Sprintf("started ollama server with pid %d", cmd.Process.Pid))
 	}
 	slog.Info(fmt.Sprintf("ollama server logs %s", ServerLogFile))

+	return cmd, nil
+}
+
+func SpawnServer(ctx context.Context, command string, options ServerOptions) (chan int, error) {
+	logDir := filepath.Dir(ServerLogFile)
+	_, err := os.Stat(logDir)
+	if errors.Is(err, os.ErrNotExist) {
+		if err := os.MkdirAll(logDir, 0o755); err != nil {
+			return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
+		}
+	}
+
+	done := make(chan int)
+
 	go func() {
 		// Keep the server running unless we're shuttind down the app
 		crashCount := 0
 		for {
+			slog.Info(fmt.Sprintf("starting server..."))
+			cmd, err := start(ctx, command, options)
+			if err != nil {
+				slog.Error(fmt.Sprintf("failed to start server %s", err))
+			}
+
 			cmd.Wait() //nolint:errcheck
-			stdin.Close()
 			var code int
 			if cmd.ProcessState != nil {
 				code = cmd.ProcessState.ExitCode()
@@ -143,19 +138,16 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
 			default:
 				crashCount++
 				slog.Warn(fmt.Sprintf("server crash %d - exit code %d - respawning", crashCount, code))
-				time.Sleep(500 * time.Millisecond)
-				if err := cmd.Start(); err != nil {
-					slog.Error(fmt.Sprintf("failed to restart server %s", err))
-					// Keep trying, but back off if we keep failing
-					time.Sleep(time.Duration(crashCount) * time.Second)
-				}
+				time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
+				break
 			}
 		}
 	}()
+
 	return done, nil
 }

-func IsServerRunning(ctx context.Context) bool {
+func isServerRunning(ctx context.Context) bool {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		slog.Info("unable to connect to server")
--- a/app/lifecycle/server_unix.go
+++ b/app/lifecycle/server_unix.go
@@ -1,6 +1,4 @@
-//go:build !windows
-
-package lifecycle
+package main

 import (
 	"context"
--- a/app/lifecycle/server_windows.go
+++ b/app/lifecycle/server_windows.go
@@ -1,4 +1,4 @@
-package lifecycle
+package main

 import (
 	"context"
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -1,5 +1,3 @@
-//go:build !windows
-
 package tray

 import (
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -1,4 +1,4 @@
-package lifecycle
+package updater

 import (
 	"context"
@@ -22,6 +22,10 @@ import (
 	"github.com/ollama/ollama/version"
 )

+var (
+	UpdateStageDir string
+)
+
 var (
 	UpdateCheckURLBase  = "https://ollama.com/api/update"
 	UpdateDownloaded    = false
@@ -123,7 +127,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
 		slog.Debug("no etag detected, falling back to filename based dedup")
 		etag = "_"
 	}
-	filename := Installer
+	filename := "OllamaSetup.exe"
 	_, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition"))
 	if err == nil {
 		filename = params["filename"]
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@@ -1,6 +1,4 @@
-//go:build !windows
-
-package lifecycle
+package updater

 import (
 	"context"
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -1,4 +1,4 @@
-package lifecycle
+package updater

 import (
 	"context"
@@ -9,7 +9,13 @@ import (
 	"path/filepath"
 )

+func init() {
+	UpdateStageDir = filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "updates")
+}
+
 func DoUpgrade(cancel context.CancelFunc, done chan int) error {
+	logFile := filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "upgrade.log")
+
 	files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform
 	if err != nil {
 		return fmt.Errorf("failed to lookup downloads: %s", err)
@@ -23,13 +29,13 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 	installerExe := files[0]

 	slog.Info("starting upgrade with " + installerExe)
-	slog.Info("upgrade log file " + UpgradeLogFile)
+	slog.Info("upgrade log file " + logFile)

 	// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
 	installArgs := []string{
-		"/CLOSEAPPLICATIONS",                    // Quit the tray app if it's still running
-		"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
-		"/FORCECLOSEAPPLICATIONS",               // Force close the tray app - might be needed
+		"/CLOSEAPPLICATIONS",             // Quit the tray app if it's still running
+		"/LOG=" + filepath.Base(logFile), // Only relative seems reliable, so set pwd
+		"/FORCECLOSEAPPLICATIONS",        // Force close the tray app - might be needed
 	}
 	// When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts)
 	// TODO - temporarily disable since we're pinning in debug mode for the preview
@@ -53,7 +59,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 	}

 	slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs))
-	os.Chdir(filepath.Dir(UpgradeLogFile)) //nolint:errcheck
+	os.Chdir(filepath.Dir(logFile)) //nolint:errcheck
 	cmd := exec.Command(installerExe, installArgs...)

 	if err := cmd.Start(); err != nil {
--- a/app/windows/ollama.iss
+++ b/app/windows/ollama.iss
@@ -92,12 +92,8 @@ Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64
 Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
-; Assumes v5.7, may need adjustments for v6
-#if GetEnv("HIP_PATH") != ""
-  Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
-  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
-  ; amdhip64.dll dependency comes from the driver and must be installed already
-  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
+#if DirExists("..\dist\windows-amd64\rocm")
+  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
 #endif


@@ -133,7 +129,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi


 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama2
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3
 ;ClickFinish=%n

 [Registry]
--- a/app/windows/ollama.rc
+++ b/app/windows/ollama.rc
--- a/app/windows/ollama_welcome.ps1
+++ b/app/windows/ollama_welcome.ps1
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -17,6 +17,7 @@ import (
 	"os"
 	"os/signal"
 	"path/filepath"
+	"regexp"
 	"runtime"
 	"strings"
 	"syscall"
@@ -53,8 +54,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()

-	bars := make(map[string]*progress.Bar)
-
 	modelfile, err := os.ReadFile(filename)
 	if err != nil {
 		return err
@@ -95,95 +94,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

-			// TODO make this work w/ adapters
 			if fi.IsDir() {
-				tf, err := os.CreateTemp("", "ollama-tf")
+				// this is likely a safetensors or pytorch directory
+				// TODO make this work w/ adapters
+				tempfile, err := tempZipFiles(path)
 				if err != nil {
 					return err
 				}
-				defer os.RemoveAll(tf.Name())
+				defer os.RemoveAll(tempfile)

-				zf := zip.NewWriter(tf)
-
-				files := []string{}
-
-				tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
-				if err != nil {
-					return err
-				} else if len(tfiles) == 0 {
-					tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
-					if err != nil {
-						return err
-					}
-				}
-
-				files = append(files, tfiles...)
-
-				if len(files) == 0 {
-					return fmt.Errorf("no models were found in '%s'", path)
-				}
-
-				// add the safetensor/torch config file + tokenizer
-				files = append(files, filepath.Join(path, "config.json"))
-				files = append(files, filepath.Join(path, "params.json"))
-				files = append(files, filepath.Join(path, "added_tokens.json"))
-				files = append(files, filepath.Join(path, "tokenizer.model"))
-
-				for _, fn := range files {
-					f, err := os.Open(fn)
-
-					// just skip whatever files aren't there
-					if os.IsNotExist(err) {
-						if strings.HasSuffix(fn, "tokenizer.model") {
-							// try the parent dir before giving up
-							parentDir := filepath.Dir(path)
-							newFn := filepath.Join(parentDir, "tokenizer.model")
-							f, err = os.Open(newFn)
-							if os.IsNotExist(err) {
-								continue
-							} else if err != nil {
-								return err
-							}
-						} else {
-							continue
-						}
-					} else if err != nil {
-						return err
-					}
-
-					fi, err := f.Stat()
-					if err != nil {
-						return err
-					}
-
-					h, err := zip.FileInfoHeader(fi)
-					if err != nil {
-						return err
-					}
-
-					h.Name = filepath.Base(fn)
-					h.Method = zip.Store
-
-					w, err := zf.CreateHeader(h)
-					if err != nil {
-						return err
-					}
-
-					_, err = io.Copy(w, f)
-					if err != nil {
-						return err
-					}
-
-				}
-
-				if err := zf.Close(); err != nil {
-					return err
-				}
-
-				if err := tf.Close(); err != nil {
-					return err
-				}
-				path = tf.Name()
+				path = tempfile
 			}

 			digest, err := createBlob(cmd, client, path)
@@ -191,10 +111,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

-			modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
+			name := c.Name
+			if c.Name == "model" {
+				name = "from"
+			}
+
+			re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
+			modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
 		}
 	}

+	bars := make(map[string]*progress.Bar)
 	fn := func(resp api.ProgressResponse) error {
 		if resp.Digest != "" {
 			spinner.Stop()
@@ -228,6 +155,114 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	return nil
 }

+func tempZipFiles(path string) (string, error) {
+	tempfile, err := os.CreateTemp("", "ollama-tf")
+	if err != nil {
+		return "", err
+	}
+	defer tempfile.Close()
+
+	zipfile := zip.NewWriter(tempfile)
+	defer zipfile.Close()
+
+	detectContentType := func(path string) (string, error) {
+		f, err := os.Open(path)
+		if err != nil {
+			return "", err
+		}
+		defer f.Close()
+
+		var b bytes.Buffer
+		b.Grow(512)
+
+		if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
+			return "", err
+		}
+
+		contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
+		return contentType, nil
+	}
+
+	glob := func(pattern, contentType string) ([]string, error) {
+		matches, err := filepath.Glob(pattern)
+		if err != nil {
+			return nil, err
+		}
+
+		for _, safetensor := range matches {
+			if ct, err := detectContentType(safetensor); err != nil {
+				return nil, err
+			} else if ct != contentType {
+				return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
+			}
+		}
+
+		return matches, nil
+	}
+
+	var files []string
+	if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
+		// safetensors files might be unresolved git lfs references; skip if they are
+		// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
+		files = append(files, st...)
+	} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
+		// pytorch files might also be unresolved git lfs references; skip if they are
+		// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
+		files = append(files, pt...)
+	} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
+		// pytorch files might also be unresolved git lfs references; skip if they are
+		// covers consolidated.x.pth, consolidated.pth
+		files = append(files, pt...)
+	} else {
+		return "", errors.New("no safetensors or torch files found")
+	}
+
+	// add configuration files, json files are detected as text/plain
+	js, err := glob(filepath.Join(path, "*.json"), "text/plain")
+	if err != nil {
+		return "", err
+	}
+	files = append(files, js...)
+
+	if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
+		// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
+		// tokenizer.model might be a unresolved git lfs reference; error if it is
+		files = append(files, tks...)
+	} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
+		// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
+		files = append(files, tks...)
+	}
+
+	for _, file := range files {
+		f, err := os.Open(file)
+		if err != nil {
+			return "", err
+		}
+		defer f.Close()
+
+		fi, err := f.Stat()
+		if err != nil {
+			return "", err
+		}
+
+		zfi, err := zip.FileInfoHeader(fi)
+		if err != nil {
+			return "", err
+		}
+
+		zf, err := zipfile.CreateHeader(zfi)
+		if err != nil {
+			return "", err
+		}
+
+		if _, err := io.Copy(zf, f); err != nil {
+			return "", err
+		}
+	}
+
+	return tempfile.Name(), nil
+}
+
 func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
 	bin, err := os.Open(path)
 	if err != nil {
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -14,7 +14,7 @@ As this is a preview release, you should expect a few bugs here and there.  If
 you run into a problem you can reach out on
 [Discord](https://discord.gg/ollama), or file an 
 [issue](https://github.com/ollama/ollama/issues).
-Logs will often be helpful in dianosing the problem (see
+Logs will often be helpful in diagnosing the problem (see
 [Troubleshooting](#troubleshooting) below)

 ## System Requirements
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -32,9 +32,25 @@ func PayloadsDir() (string, error) {
 				slog.Error("failed to lookup executable path", "error", err)
 				return "", err
 			}
+
+			cwd, err := os.Getwd()
+			if err != nil {
+				slog.Error("failed to lookup working directory", "error", err)
+				return "", err
+			}
+
+			var paths []string
+			for _, root := range []string{appExe, cwd} {
+				paths = append(paths,
+					filepath.Join(root),
+					filepath.Join(root, "windows-"+runtime.GOARCH),
+					filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
+				)
+			}
+
 			// Try a few variations to improve developer experience when building from source in the local tree
-			for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} {
-				candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners")
+			for _, p := range paths {
+				candidate := filepath.Join(p, "ollama_runners")
 				_, err := os.Stat(candidate)
 				if err == nil {
 					runnersDir = candidate
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -1,5 +1,3 @@
-//go:build darwin
-
 package gpu

 /*
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -21,7 +21,7 @@ init_vars() {
        # TODO - add additional optimization flags...
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
    fi
-    case $(uname -s) in 
+    case $(uname -s) in
    "Darwin")
        LIB_EXT="dylib"
        WHOLE_ARCHIVE="-Wl,-force_load"
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -165,11 +165,11 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
    fi
    if [ "${ARCH}" == "arm64" ]; then
        echo "ARM CPU detected - disabling unsupported AVX instructions"
-        
+
        # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
        #
-        # CUDA compute < 6.0 lacks proper FP16 support on ARM. 
-        # Disabling has minimal performance effect while maintaining compatibility. 
+        # CUDA compute < 6.0 lacks proper FP16 support on ARM.
+        # Disabling has minimal performance effect while maintaining compatibility.
        ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
    fi
    # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -26,16 +26,25 @@ function amdGPUs {
    $GPU_LIST -join ';'
 }

+
 function init_vars {
-    $script:SRC_DIR = $(resolve-path "..\..\")
-    $script:llamacppDir = "../llama.cpp"
+    if (!$script:SRC_DIR) {
+        $script:SRC_DIR = $(resolve-path "..\..\")
+    }
+    if (!$script:llamacppDir) {
+        $script:llamacppDir = "../llama.cpp"
+    }
+    if (!$script:cmakeTargets) {
+        $script:cmakeTargets = @("ollama_llama_server")
+    }
    $script:cmakeDefs = @(
        "-DBUILD_SHARED_LIBS=on",
        "-DLLAMA_NATIVE=off"
        )
-    $script:cmakeTargets = @("ollama_llama_server")
+    $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
    $script:ARCH = "amd64" # arm not yet supported.
    $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
+    md "$script:DIST_BASE" -ea 0 > $null
    if ($env:CGO_CFLAGS -contains "-g") {
        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
        $script:config = "RelWithDebInfo"
@@ -166,137 +175,191 @@ function cleanup {
    }
 }

-init_vars
-git_module_setup
-apply_patches

 # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
 # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
 # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver

-$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")

-if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
+function build_static() {
+    if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
+        # GCC build for direct linking into the Go binary
+        init_vars
+        # cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
+        # as we need this to be compiled by gcc for golang to be able to link with itx
+        write-host "Checking for MinGW..."
+        # error action ensures we exit on failure
+        get-command gcc
+        get-command mingw32-make
+        $oldTargets = $script:cmakeTargets
+        $script:cmakeTargets = @("llama", "ggml")
+        $script:cmakeDefs = @(
+            "-G", "MinGW Makefiles"
+            "-DCMAKE_C_COMPILER=gcc.exe",
+            "-DCMAKE_CXX_COMPILER=g++.exe",
+            "-DBUILD_SHARED_LIBS=off",
+            "-DLLAMA_NATIVE=off",
+            "-DLLAMA_AVX=off",
+            "-DLLAMA_AVX2=off",
+            "-DLLAMA_AVX512=off",
+            "-DLLAMA_F16C=off",
+            "-DLLAMA_FMA=off")
+        $script:buildDir="../build/windows/${script:ARCH}_static"
+        write-host "Building static library"
+        build
+        $script:cmakeTargets = $oldTargets
+    } else {
+        write-host "Skipping CPU generation step as requested"
+    }
+}
+
+function build_cpu() {
+    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
+        # remaining llama.cpp builds use MSVC 
+        init_vars
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+        $script:buildDir="../build/windows/${script:ARCH}/cpu"
+        $script:distDir="$script:DIST_BASE\cpu"
+        write-host "Building LCD CPU"
+        build
+        sign
+        install
+    } else {
+        write-host "Skipping CPU generation step as requested"
+    }
+}
+
+function build_cpu_avx() {
+    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
+        init_vars
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
+        $script:distDir="$script:DIST_BASE\cpu_avx"
+        write-host "Building AVX CPU"
+        build
+        sign
+        install
+    } else {
+        write-host "Skipping CPU AVX generation step as requested"
+    }
+}
+
+function build_cpu_avx2() {
+    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
+        init_vars
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
+        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
+        $script:distDir="$script:DIST_BASE\cpu_avx2"
+        write-host "Building AVX2 CPU"
+        build
+        sign
+        install
+    } else {
+        write-host "Skipping CPU AVX2 generation step as requested"
+    }
+}
+
+function build_cuda() {
+    if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
+        # Then build cuda as a dynamically loaded library
+        $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
+        $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
+        if ($null -ne $script:CUDA_VERSION) {
+            $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
+        }
+        init_vars
+        $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
+        $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
+        $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+        if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
+            write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
+            $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
+            write-host "building custom CUDA GPU"
+        }
+        build
+        sign
+        install
+
+        write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+        cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+        cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+        cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
+    } else {
+        write-host "Skipping CUDA generation step"
+    }
+}
+
+function build_rocm() {
+    if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
+        $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
+        if ($null -ne $script:ROCM_VERSION) {
+            $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
+        }
+
+        init_vars
+        $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
+        $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
+        $script:cmakeDefs += @(
+            "-G", "Ninja", 
+            "-DCMAKE_C_COMPILER=clang.exe",
+            "-DCMAKE_CXX_COMPILER=clang++.exe",
+            "-DLLAMA_HIPBLAS=on",
+            "-DHIP_PLATFORM=amd",
+            "-DLLAMA_AVX=on",
+            "-DLLAMA_AVX2=off",
+            "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
+            "-DAMDGPU_TARGETS=$(amdGPUs)",
+            "-DGPU_TARGETS=$(amdGPUs)"
+            )
+
+        # Make sure the ROCm binary dir is first in the path
+        $env:PATH="$env:HIP_PATH\bin;$env:PATH"
+
+        # We have to clobber the LIB var from the developer shell for clang to work properly
+        $env:LIB=""
+        if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
+            write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
+            $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
+            write-host "building custom ROCM GPU"
+        }
+        write-host "Building ROCm"
+        build
+        # Ninja doesn't prefix with config name
+        ${script:config}=""
+        if ($null -ne $script:DUMPBIN) {
+            & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
+        }
+        sign
+        install
+
+        # Assumes v5.7, may need adjustments for v6
+        rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
+        md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
+        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
+        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
+        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
+        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
+    } else {
+        write-host "Skipping ROCm generation step"
+    }
+}

-# GCC build for direct linking into the Go binary
 init_vars
-# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
-# as we need this to be compiled by gcc for golang to be able to link with itx
-write-host "Checking for MinGW..."
-# error action ensures we exit on failure
-get-command gcc
-get-command mingw32-make
-$script:cmakeTargets = @("llama", "ggml")
-$script:cmakeDefs = @(
-    "-G", "MinGW Makefiles"
-    "-DCMAKE_C_COMPILER=gcc.exe",
-    "-DCMAKE_CXX_COMPILER=g++.exe",
-    "-DBUILD_SHARED_LIBS=off",
-    "-DLLAMA_NATIVE=off",
-    "-DLLAMA_AVX=off",
-    "-DLLAMA_AVX2=off",
-    "-DLLAMA_AVX512=off",
-    "-DLLAMA_F16C=off",
-    "-DLLAMA_FMA=off")
-$script:buildDir="../build/windows/${script:ARCH}_static"
-write-host "Building static library"
-build
+if ($($args.count) -eq 0) {
+    git_module_setup
+    apply_patches
+    build_static
+    build_cpu
+    build_cpu_avx
+    build_cpu_avx2
+    build_cuda
+    build_rocm

-# remaining llama.cpp builds use MSVC 
-    init_vars
-    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
-    $script:buildDir="../build/windows/${script:ARCH}/cpu"
-    $script:distDir="$script:DIST_BASE\cpu"
-    write-host "Building LCD CPU"
-    build
-    sign
-    install
-
-    init_vars
-    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
-    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
-    $script:distDir="$script:DIST_BASE\cpu_avx"
-    write-host "Building AVX CPU"
-    build
-    sign
-    install
-
-    init_vars
-    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
-    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
-    $script:distDir="$script:DIST_BASE\cpu_avx2"
-    write-host "Building AVX2 CPU"
-    build
-    sign
-    install
+    cleanup
+    write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
 } else {
-    write-host "Skipping CPU generation step as requested"
-}
-
-if ($null -ne $script:CUDA_LIB_DIR) {
-    # Then build cuda as a dynamically loaded library
-    $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
-    $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
-    if ($null -ne $script:CUDA_VERSION) {
-        $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
-    }
-    init_vars
-    $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
-    $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
-    $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
-    if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
-        write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
-        $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
-        write-host "building custom CUDA GPU"
-    }
-    build
-    sign
-    install
-}
-
-if ($null -ne $env:HIP_PATH) {
-    $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
-    if ($null -ne $script:ROCM_VERSION) {
-        $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
-    }
-
-    init_vars
-    $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
-    $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
-    $script:cmakeDefs += @(
-        "-G", "Ninja", 
-        "-DCMAKE_C_COMPILER=clang.exe",
-        "-DCMAKE_CXX_COMPILER=clang++.exe",
-        "-DLLAMA_HIPBLAS=on",
-        "-DHIP_PLATFORM=amd",
-        "-DLLAMA_AVX=on",
-        "-DLLAMA_AVX2=off",
-        "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
-        "-DAMDGPU_TARGETS=$(amdGPUs)",
-        "-DGPU_TARGETS=$(amdGPUs)"
-        )
-
-    # Make sure the ROCm binary dir is first in the path
-    $env:PATH="$env:HIP_PATH\bin;$env:PATH"
-
-    # We have to clobber the LIB var from the developer shell for clang to work properly
-    $env:LIB=""
-    if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
-        write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
-        $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
-        write-host "building custom ROCM GPU"
-    }
-    write-host "Building ROCm"
-    build
-    # Ninja doesn't prefix with config name
-    ${script:config}=""
-    if ($null -ne $script:DUMPBIN) {
-        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
-    }
-    sign
-    install
-}
-
-
-cleanup
-write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
+    for ( $i = 0; $i -lt $args.count; $i++ ) {
+        write-host "performing $($args[$i])"
+        & $($args[$i])
+    } 
+}
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -5,7 +5,6 @@ import (
 	"log/slog"
 	"os"
 	"strconv"
-	"strings"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
@@ -100,8 +99,26 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 		return 0, 0
 	}

-	var layerCount int
 	layers := ggml.Tensors().Layers()
+
+	var memoryLayerOutput uint64
+	if layer, ok := layers["output_norm"]; ok {
+		memoryLayerOutput += layer.size()
+	}
+
+	if layer, ok := layers["output"]; ok {
+		memoryLayerOutput += layer.size()
+	} else if layer, ok := layers["token_embd"]; ok {
+		memoryLayerOutput += layer.size()
+	}
+
+	if gpus[0].Library == "metal" && opts.UseMMap {
+		// memory is preallocated for output tensors
+		memoryRequiredTotal += memoryLayerOutput
+		memoryRequiredPartial += memoryLayerOutput
+	}
+
+	var layerCount int
 	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
 		memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()

@@ -115,15 +132,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 		}
 	}

-	var memoryLayerOutput uint64
-	for k, v := range layers {
-		if !strings.HasPrefix(k, "blk.") {
-			memoryLayerOutput += v.size()
-		}
+	if gpus[0].Library != "metal" || !opts.UseMMap {
+		// memory was not preallocated for output tensors
+		memoryRequiredTotal += memoryLayerOutput
 	}

-	memoryRequiredTotal += memoryLayerOutput
-
 	if memoryAvailable > memoryRequiredTotal {
 		layerCount = int(ggml.KV().BlockCount()) + 1
 		memoryRequiredPartial = memoryRequiredTotal
--- a/llm/patches/02-clip-log.diff
+++ b/llm/patches/02-clip-log.diff
@@ -0,0 +1,12 @@
+diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
+index e431c7f7..f077e688 100644
+--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
+@@ -3,6 +3,7 @@
+ // I'll gradually clean and extend it
+ // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
+ #include "clip.h"
+#include "common.h"
+ #include "log.h"
+ #include "ggml.h"
+ #include "ggml-alloc.h"
--- a/llm/patches/04-metal.diff
+++ b/llm/patches/04-metal.diff
@@ -0,0 +1,45 @@
+diff --git a/ggml-metal.m b/ggml-metal.m
+index 0207b787..b5e9884b 100644
+--- a/ggml-metal.m
+++ b/ggml-metal.m
+@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
+                         // to the matrix-vector kernel
+                         int ne11_mm_min = 1;
+ 
+-#if 0
+                         // the numbers below are measured on M2 Ultra for 7B and 13B models
+                         // these numbers do not translate to other devices or model sizes
+                         // TODO: need to find a better approach
+-                        if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
+-                            switch (src0t) {
+-                                case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
+-                                case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
+-                                case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
+-                                case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
+-                                case GGML_TYPE_Q4_0:
+-                                case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
+-                                case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
+-                                case GGML_TYPE_Q5_0:                          // not tested yet
+-                                case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
+-                                case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
+-                                case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
+-                                default:             ne11_mm_min = 1;  break;
+-                            }
+                        switch (src0t) {
+                            case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
+                            case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
+                            case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q4_0:
+                            case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
+                            case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
+                            case GGML_TYPE_Q5_0:                          // not tested yet
+                            case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
+                            case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
+                            default:             ne11_mm_min = 1;  break;
+                         }
+-#endif
+ 
+                         // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
+                         // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
--- a/llm/server.go
+++ b/llm/server.go
@@ -442,7 +442,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 		select {
 		case <-ctx.Done():
 			slog.Info("context expired before server started")
-			return fmt.Errorf("timed out waiting for llama runner to start")
+			return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
 		case err := <-s.done:
 			msg := ""
 			if s.status != nil && s.status.LastErrMsg != "" {
@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 		return err
 	}
 	defer s.sem.Release(1)
+
+	// only allow maximum 10 "context shifts" to avoid infinite generation
+	if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
+		req.Options.NumPredict = 10 * s.options.NumCtx
+		slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
+	}
+
 	request := map[string]any{
 		"prompt":            req.Prompt,
 		"stream":            true,
--- a/macapp/.eslintrc.json
+++ b/macapp/.eslintrc.json
@@ -1,16 +0,0 @@
-{
-  "env": {
-    "browser": true,
-    "es6": true,
-    "node": true
-  },
-  "extends": [
-    "eslint:recommended",
-    "plugin:@typescript-eslint/eslint-recommended",
-    "plugin:@typescript-eslint/recommended",
-    "plugin:import/recommended",
-    "plugin:import/electron",
-    "plugin:import/typescript"
-  ],
-  "parser": "@typescript-eslint/parser"
-}
--- a/macapp/.gitignore
+++ b/macapp/.gitignore
@@ -1,92 +0,0 @@
-# Logs
-logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-lerna-debug.log*
-
-# Diagnostic reports (https://nodejs.org/api/report.html)
-report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
-
-# Runtime data
-pids
-*.pid
-*.seed
-*.pid.lock
-.DS_Store
-
-# Directory for instrumented libs generated by jscoverage/JSCover
-lib-cov
-
-# Coverage directory used by tools like istanbul
-coverage
-*.lcov
-
-# nyc test coverage
-.nyc_output
-
-# node-waf configuration
-.lock-wscript
-
-# Compiled binary addons (https://nodejs.org/api/addons.html)
-build/Release
-
-# Dependency directories
-node_modules/
-jspm_packages/
-
-# TypeScript v1 declaration files
-typings/
-
-# TypeScript cache
-*.tsbuildinfo
-
-# Optional npm cache directory
-.npm
-
-# Optional eslint cache
-.eslintcache
-
-# Optional REPL history
-.node_repl_history
-
-# Output of 'npm pack'
-*.tgz
-
-# Yarn Integrity file
-.yarn-integrity
-
-# dotenv environment variables file
-.env
-.env.test
-
-# parcel-bundler cache (https://parceljs.org/)
-.cache
-
-# next.js build output
-.next
-
-# nuxt.js build output
-.nuxt
-
-# vuepress build output
-.vuepress/dist
-
-# Serverless directories
-.serverless/
-
-# FuseBox cache
-.fusebox/
-
-# DynamoDB Local files
-.dynamodb/
-
-# Webpack
-.webpack/
-
-# Vite
-.vite/
-
-# Electron-Forge
-out/
--- a/macapp/README.md
+++ b/macapp/README.md
@@ -1,21 +0,0 @@
-# Desktop
-
-This app builds upon Ollama to provide a desktop experience for running models.
-
-## Developing
-
-First, build the `ollama` binary:
-
-```
-cd ..
-go build .
-```
-
-Then run the desktop app with `npm start`:
-
-```
-cd macapp
-npm install
-npm start
-```
-
--- a/macapp/assets/iconDarkTemplate.png
+++ b/macapp/assets/iconDarkTemplate.png
--- a/macapp/assets/iconDarkTemplate@2x.png
+++ b/macapp/assets/iconDarkTemplate@2x.png
--- a/macapp/assets/iconDarkUpdateTemplate.png
+++ b/macapp/assets/iconDarkUpdateTemplate.png
--- a/macapp/assets/iconDarkUpdateTemplate@2x.png
+++ b/macapp/assets/iconDarkUpdateTemplate@2x.png
--- a/macapp/assets/iconTemplate.png
+++ b/macapp/assets/iconTemplate.png
--- a/macapp/assets/iconTemplate@2x.png
+++ b/macapp/assets/iconTemplate@2x.png
--- a/macapp/assets/iconUpdateTemplate.png
+++ b/macapp/assets/iconUpdateTemplate.png
--- a/macapp/assets/iconUpdateTemplate@2x.png
+++ b/macapp/assets/iconUpdateTemplate@2x.png
--- a/macapp/forge.config.ts
+++ b/macapp/forge.config.ts
@@ -1,78 +0,0 @@
-import type { ForgeConfig } from '@electron-forge/shared-types'
-import { MakerSquirrel } from '@electron-forge/maker-squirrel'
-import { MakerZIP } from '@electron-forge/maker-zip'
-import { PublisherGithub } from '@electron-forge/publisher-github'
-import { AutoUnpackNativesPlugin } from '@electron-forge/plugin-auto-unpack-natives'
-import { WebpackPlugin } from '@electron-forge/plugin-webpack'
-import * as path from 'path'
-import * as fs from 'fs'
-
-import { mainConfig } from './webpack.main.config'
-import { rendererConfig } from './webpack.renderer.config'
-
-const packageJson = JSON.parse(fs.readFileSync(path.resolve(__dirname, './package.json'), 'utf8'))
-
-const config: ForgeConfig = {
-  packagerConfig: {
-    appVersion: process.env.VERSION || packageJson.version,
-    asar: true,
-    icon: './assets/icon.icns',
-    extraResource: [
-      '../dist/ollama',
-      path.join(__dirname, './assets/iconTemplate.png'),
-      path.join(__dirname, './assets/iconTemplate@2x.png'),
-      path.join(__dirname, './assets/iconUpdateTemplate.png'),
-      path.join(__dirname, './assets/iconUpdateTemplate@2x.png'),
-      path.join(__dirname, './assets/iconDarkTemplate.png'),
-      path.join(__dirname, './assets/iconDarkTemplate@2x.png'),
-      path.join(__dirname, './assets/iconDarkUpdateTemplate.png'),
-      path.join(__dirname, './assets/iconDarkUpdateTemplate@2x.png'),
-    ],
-    ...(process.env.SIGN
-      ? {
-          osxSign: {
-            identity: process.env.APPLE_IDENTITY,
-          },
-          osxNotarize: {
-            tool: 'notarytool',
-            appleId: process.env.APPLE_ID || '',
-            appleIdPassword: process.env.APPLE_PASSWORD || '',
-            teamId: process.env.APPLE_TEAM_ID || '',
-          },
-        }
-      : {}),
-    osxUniversal: {
-      x64ArchFiles: '**/ollama',
-    },
-  },
-  rebuildConfig: {},
-  makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
-  hooks: {
-    readPackageJson: async (_, packageJson) => {
-      return { ...packageJson, version: process.env.VERSION || packageJson.version }
-    },
-  },
-  plugins: [
-    new AutoUnpackNativesPlugin({}),
-    new WebpackPlugin({
-      mainConfig,
-      devContentSecurityPolicy: `default-src * 'unsafe-eval' 'unsafe-inline'; img-src data: 'self'`,
-      renderer: {
-        config: rendererConfig,
-        nodeIntegration: true,
-        entryPoints: [
-          {
-            html: './src/index.html',
-            js: './src/renderer.tsx',
-            name: 'main_window',
-            preload: {
-              js: './src/preload.ts',
-            },
-          },
-        ],
-      },
-    }),
-  ],
-}
-
-export default config
--- a/macapp/package-lock.json
+++ b/macapp/package-lock.json
--- a/macapp/package.json
+++ b/macapp/package.json
@@ -1,84 +0,0 @@
-{
-  "name": "ollama",
-  "productName": "Ollama",
-  "version": "0.0.0",
-  "description": "ollama",
-  "main": ".webpack/main",
-  "scripts": {
-    "start": "electron-forge start",
-    "package": "electron-forge package --arch universal",
-    "package:sign": "SIGN=1 electron-forge package --arch universal",
-    "make": "electron-forge make --arch universal",
-    "make:sign": "SIGN=1 electron-forge make --arch universal",
-    "publish": "SIGN=1 electron-forge publish",
-    "lint": "eslint --ext .ts,.tsx .",
-    "format": "prettier --check . --ignore-path .gitignore",
-    "format:fix": "prettier --write . --ignore-path .gitignore"
-  },
-  "keywords": [],
-  "author": {
-    "name": "Jeffrey Morgan",
-    "email": "jmorganca@gmail.com"
-  },
-  "license": "MIT",
-  "devDependencies": {
-    "@babel/core": "^7.22.5",
-    "@babel/preset-react": "^7.22.5",
-    "@electron-forge/cli": "^6.2.1",
-    "@electron-forge/maker-deb": "^6.2.1",
-    "@electron-forge/maker-rpm": "^6.2.1",
-    "@electron-forge/maker-squirrel": "^6.2.1",
-    "@electron-forge/maker-zip": "^6.2.1",
-    "@electron-forge/plugin-auto-unpack-natives": "^6.2.1",
-    "@electron-forge/plugin-webpack": "^6.2.1",
-    "@electron-forge/publisher-github": "^6.2.1",
-    "@electron/universal": "^1.4.1",
-    "@svgr/webpack": "^8.0.1",
-    "@types/chmodr": "^1.0.0",
-    "@types/node": "^20.4.0",
-    "@types/react": "^18.2.14",
-    "@types/react-dom": "^18.2.6",
-    "@types/uuid": "^9.0.2",
-    "@typescript-eslint/eslint-plugin": "^5.60.0",
-    "@typescript-eslint/parser": "^5.60.0",
-    "@vercel/webpack-asset-relocator-loader": "^1.7.3",
-    "babel-loader": "^9.1.2",
-    "chmodr": "^1.2.0",
-    "copy-webpack-plugin": "^11.0.0",
-    "css-loader": "^6.8.1",
-    "electron": "25.9.2",
-    "eslint": "^8.43.0",
-    "eslint-plugin-import": "^2.27.5",
-    "fork-ts-checker-webpack-plugin": "^7.3.0",
-    "node-loader": "^2.0.0",
-    "postcss": "^8.4.24",
-    "postcss-import": "^15.1.0",
-    "postcss-loader": "^7.3.3",
-    "postcss-preset-env": "^8.5.1",
-    "prettier": "^2.8.8",
-    "prettier-plugin-tailwindcss": "^0.3.0",
-    "style-loader": "^3.3.3",
-    "svg-inline-loader": "^0.8.2",
-    "tailwindcss": "^3.3.2",
-    "ts-loader": "^9.4.3",
-    "ts-node": "^10.9.1",
-    "typescript": "~4.5.4",
-    "url-loader": "^4.1.1",
-    "webpack": "^5.88.0",
-    "webpack-cli": "^5.1.4",
-    "webpack-dev-server": "^4.15.1"
-  },
-  "dependencies": {
-    "@electron/remote": "^2.0.10",
-    "@heroicons/react": "^2.0.18",
-    "@segment/analytics-node": "^1.0.0",
-    "copy-to-clipboard": "^3.3.3",
-    "electron-squirrel-startup": "^1.0.0",
-    "electron-store": "^8.1.0",
-    "react": "^18.2.0",
-    "react-dom": "^18.2.0",
-    "uuid": "^9.0.0",
-    "winston": "^3.10.0",
-    "winston-daily-rotate-file": "^4.7.1"
-  }
-}
--- a/macapp/postcss.config.js
+++ b/macapp/postcss.config.js
@@ -1,7 +0,0 @@
-module.exports = {
-  plugins: {
-    'postcss-import': {},
-    tailwindcss: {},
-    autoprefixer: {},
-  },
-}
--- a/macapp/src/app.css
+++ b/macapp/src/app.css
@@ -1,34 +0,0 @@
-@tailwind base;
-@tailwind components;
-@tailwind utilities;
-
-html,
-body {
-  background: transparent;
-}
-
-.drag {
-  -webkit-app-region: drag;
-}
-
-.no-drag {
-  -webkit-app-region: no-drag;
-}
-
-.blink {
-  -webkit-animation: 1s blink step-end infinite;
-  -moz-animation: 1s blink step-end infinite;
-  -ms-animation: 1s blink step-end infinite;
-  -o-animation: 1s blink step-end infinite;
-  animation: 1s blink step-end infinite;
-}
-
-@keyframes blink {
-  from,
-  to {
-    color: transparent;
-  }
-  50% {
-    color: black;
-  }
-}
--- a/macapp/src/app.tsx
+++ b/macapp/src/app.tsx
@@ -1,122 +0,0 @@
-import { useState } from 'react'
-import copy from 'copy-to-clipboard'
-import { CheckIcon, DocumentDuplicateIcon } from '@heroicons/react/24/outline'
-import Store from 'electron-store'
-import { getCurrentWindow, app } from '@electron/remote'
-
-import { install } from './install'
-import OllamaIcon from './ollama.svg'
-
-const store = new Store()
-
-enum Step {
-  WELCOME = 0,
-  CLI,
-  FINISH,
-}
-
-export default function () {
-  const [step, setStep] = useState<Step>(Step.WELCOME)
-  const [commandCopied, setCommandCopied] = useState<boolean>(false)
-
-  const command = 'ollama run llama2'
-
-  return (
-    <div className='drag'>
-      <div className='mx-auto flex min-h-screen w-full flex-col justify-between bg-white px-4 pt-16'>
-        {step === Step.WELCOME && (
-          <>
-            <div className='mx-auto text-center'>
-              <h1 className='mb-6 mt-4 text-2xl tracking-tight text-gray-900'>Welcome to Ollama</h1>
-              <p className='mx-auto w-[65%] text-sm text-gray-400'>
-                Let's get you up and running with your own large language models.
-              </p>
-              <button
-                onClick={() => setStep(Step.CLI)}
-                className='no-drag rounded-dm mx-auto my-8 w-[40%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
-              >
-                Next
-              </button>
-            </div>
-            <div className='mx-auto'>
-              <OllamaIcon />
-            </div>
-          </>
-        )}
-        {step === Step.CLI && (
-          <>
-            <div className='mx-auto flex flex-col space-y-28 text-center'>
-              <h1 className='mt-4 text-2xl tracking-tight text-gray-900'>Install the command line</h1>
-              <pre className='mx-auto text-4xl text-gray-400'>&gt; ollama</pre>
-              <div className='mx-auto'>
-                <button
-                  onClick={async () => {
-                    try {
-                      await install()
-                      setStep(Step.FINISH)
-                    } catch (e) {
-                      console.error('could not install: ', e)
-                    } finally {
-                      getCurrentWindow().show()
-                      getCurrentWindow().focus()
-                    }
-                  }}
-                  className='no-drag rounded-dm mx-auto w-[60%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
-                >
-                  Install
-                </button>
-                <p className='mx-auto my-4 w-[70%] text-xs text-gray-400'>
-                  You will be prompted for administrator access
-                </p>
-              </div>
-            </div>
-          </>
-        )}
-        {step === Step.FINISH && (
-          <>
-            <div className='mx-auto flex flex-col space-y-20 text-center'>
-              <h1 className='mt-4 text-2xl tracking-tight text-gray-900'>Run your first model</h1>
-              <div className='flex flex-col'>
-                <div className='group relative flex items-center'>
-                  <pre className='language-none text-2xs w-full rounded-md bg-gray-100 px-4 py-3 text-start leading-normal'>
-                    {command}
-                  </pre>
-                  <button
-                    className={`no-drag absolute right-[5px] px-2 py-2 ${
-                      commandCopied
-                        ? 'text-gray-900 opacity-100 hover:cursor-auto'
-                        : 'text-gray-200 opacity-50 hover:cursor-pointer'
-                    } hover:font-bold hover:text-gray-900 group-hover:opacity-100`}
-                    onClick={() => {
-                      copy(command)
-                      setCommandCopied(true)
-                      setTimeout(() => setCommandCopied(false), 3000)
-                    }}
-                  >
-                    {commandCopied ? (
-                      <CheckIcon className='h-4 w-4 font-bold text-gray-500' />
-                    ) : (
-                      <DocumentDuplicateIcon className='h-4 w-4 text-gray-500' />
-                    )}
-                  </button>
-                </div>
-                <p className='mx-auto my-4 w-[70%] text-xs text-gray-400'>
-                  Run this command in your favorite terminal.
-                </p>
-              </div>
-              <button
-                onClick={() => {
-                  store.set('first-time-run', true)
-                  window.close()
-                }}
-                className='no-drag rounded-dm mx-auto w-[60%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
-              >
-                Finish
-              </button>
-            </div>
-          </>
-        )}
-      </div>
-    </div>
-  )
-}
--- a/macapp/src/declarations.d.ts
+++ b/macapp/src/declarations.d.ts
@@ -1,4 +0,0 @@
-declare module '*.svg' {
-  const content: string
-  export default content
-}
--- a/macapp/src/index.html
+++ b/macapp/src/index.html
@@ -1,9 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="UTF-8" />
-  </head>
-  <body>
-    <div id="app"></div>
-  </body>
-</html>
--- a/macapp/src/index.ts
+++ b/macapp/src/index.ts
@@ -1,302 +0,0 @@
-import { spawn, ChildProcess } from 'child_process'
-import { app, autoUpdater, dialog, Tray, Menu, BrowserWindow, MenuItemConstructorOptions, nativeTheme } from 'electron'
-import Store from 'electron-store'
-import winston from 'winston'
-import 'winston-daily-rotate-file'
-import * as path from 'path'
-
-import { v4 as uuidv4 } from 'uuid'
-import { installed } from './install'
-
-require('@electron/remote/main').initialize()
-
-if (require('electron-squirrel-startup')) {
-  app.quit()
-}
-
-const store = new Store()
-
-let welcomeWindow: BrowserWindow | null = null
-
-declare const MAIN_WINDOW_WEBPACK_ENTRY: string
-
-const logger = winston.createLogger({
-  transports: [
-    new winston.transports.Console(),
-    new winston.transports.File({
-      filename: path.join(app.getPath('home'), '.ollama', 'logs', 'server.log'),
-      maxsize: 1024 * 1024 * 20,
-      maxFiles: 5,
-    }),
-  ],
-  format: winston.format.printf(info => info.message),
-})
-
-app.on('ready', () => {
-  const gotTheLock = app.requestSingleInstanceLock()
-  if (!gotTheLock) {
-    app.exit(0)
-    return
-  }
-
-  app.on('second-instance', () => {
-    if (app.hasSingleInstanceLock()) {
-      app.releaseSingleInstanceLock()
-    }
-
-    if (proc) {
-      proc.off('exit', restart)
-      proc.kill()
-    }
-
-    app.exit(0)
-  })
-
-  app.focus({ steal: true })
-
-  init()
-})
-
-function firstRunWindow() {
-  // Create the browser window.
-  welcomeWindow = new BrowserWindow({
-    width: 400,
-    height: 500,
-    frame: false,
-    fullscreenable: false,
-    resizable: false,
-    movable: true,
-    show: false,
-    webPreferences: {
-      nodeIntegration: true,
-      contextIsolation: false,
-    },
-  })
-
-  require('@electron/remote/main').enable(welcomeWindow.webContents)
-
-  welcomeWindow.loadURL(MAIN_WINDOW_WEBPACK_ENTRY)
-  welcomeWindow.on('ready-to-show', () => welcomeWindow.show())
-  welcomeWindow.on('closed', () => {
-    if (process.platform === 'darwin') {
-      app.dock.hide()
-    }
-  })
-}
-
-let tray: Tray | null = null
-let updateAvailable = false
-const assetPath = app.isPackaged ? process.resourcesPath : path.join(__dirname, '..', '..', 'assets')
-
-function trayIconPath() {
-  return nativeTheme.shouldUseDarkColors
-    ? updateAvailable
-      ? path.join(assetPath, 'iconDarkUpdateTemplate.png')
-      : path.join(assetPath, 'iconDarkTemplate.png')
-    : updateAvailable
-    ? path.join(assetPath, 'iconUpdateTemplate.png')
-    : path.join(assetPath, 'iconTemplate.png')
-}
-
-function updateTrayIcon() {
-  if (tray) {
-    tray.setImage(trayIconPath())
-  }
-}
-
-function updateTray() {
-  const updateItems: MenuItemConstructorOptions[] = [
-    { label: 'An update is available', enabled: false },
-    {
-      label: 'Restart to update',
-      click: () => autoUpdater.quitAndInstall(),
-    },
-    { type: 'separator' },
-  ]
-
-  const menu = Menu.buildFromTemplate([
-    ...(updateAvailable ? updateItems : []),
-    { role: 'quit', label: 'Quit Ollama', accelerator: 'Command+Q' },
-  ])
-
-  if (!tray) {
-    tray = new Tray(trayIconPath())
-  }
-
-  tray.setToolTip(updateAvailable ? 'An update is available' : 'Ollama')
-  tray.setContextMenu(menu)
-  tray.setImage(trayIconPath())
-
-  nativeTheme.off('updated', updateTrayIcon)
-  nativeTheme.on('updated', updateTrayIcon)
-}
-
-let proc: ChildProcess = null
-
-function server() {
-  const binary = app.isPackaged
-    ? path.join(process.resourcesPath, 'ollama')
-    : path.resolve(process.cwd(), '..', 'ollama')
-
-  proc = spawn(binary, ['serve'])
-
-  proc.stdout.on('data', data => {
-    logger.info(data.toString().trim())
-  })
-
-  proc.stderr.on('data', data => {
-    logger.error(data.toString().trim())
-  })
-
-  proc.on('exit', restart)
-}
-
-function restart() {
-  setTimeout(server, 1000)
-}
-
-app.on('before-quit', () => {
-  if (proc) {
-    proc.off('exit', restart)
-    proc.kill('SIGINT') // send SIGINT signal to the server, which also stops any loaded llms
-  }
-})
-
-const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
-  process.arch
-}&version=${app.getVersion()}&id=${id()}`
-
-let latest = ''
-async function isNewReleaseAvailable() {
-  try {
-    const response = await fetch(updateURL)
-
-    if (!response.ok) {
-      return false
-    }
-
-    if (response.status === 204) {
-      return false
-    }
-
-    const data = await response.json()
-
-    const url = data?.url
-    if (!url) {
-      return false
-    }
-
-    if (latest === url) {
-      return false
-    }
-
-    latest = url
-
-    return true
-  } catch (error) {
-    logger.error(`update check failed - ${error}`)
-    return false
-  }
-}
-
-async function checkUpdate() {
-  const available = await isNewReleaseAvailable()
-  if (available) {
-    logger.info('checking for update')
-    autoUpdater.checkForUpdates()
-  }
-}
-
-function init() {
-  if (app.isPackaged) {
-    checkUpdate()
-    setInterval(() => {
-      checkUpdate()
-    }, 60 * 60 * 1000)
-  }
-
-  updateTray()
-
-  if (process.platform === 'darwin') {
-    if (app.isPackaged) {
-      if (!app.isInApplicationsFolder()) {
-        const chosen = dialog.showMessageBoxSync({
-          type: 'question',
-          buttons: ['Move to Applications', 'Do Not Move'],
-          message: 'Ollama works best when run from the Applications directory.',
-          defaultId: 0,
-          cancelId: 1,
-        })
-
-        if (chosen === 0) {
-          try {
-            app.moveToApplicationsFolder({
-              conflictHandler: conflictType => {
-                if (conflictType === 'existsAndRunning') {
-                  dialog.showMessageBoxSync({
-                    type: 'info',
-                    message: 'Cannot move to Applications directory',
-                    detail:
-                      'Another version of Ollama is currently running from your Applications directory. Close it first and try again.',
-                  })
-                }
-                return true
-              },
-            })
-            return
-          } catch (e) {
-            logger.error(`[Move to Applications] Failed to move to applications folder - ${e.message}}`)
-          }
-        }
-      }
-    }
-  }
-
-  server()
-
-  if (store.get('first-time-run') && installed()) {
-    if (process.platform === 'darwin') {
-      app.dock.hide()
-    }
-
-    app.setLoginItemSettings({ openAtLogin: app.getLoginItemSettings().openAtLogin })
-    return
-  }
-
-  // This is the first run or the CLI is no longer installed
-  app.setLoginItemSettings({ openAtLogin: true })
-  firstRunWindow()
-}
-
-// Quit when all windows are closed, except on macOS. There, it's common
-// for applications and their menu bar to stay active until the user quits
-// explicitly with Cmd + Q.
-app.on('window-all-closed', () => {
-  if (process.platform !== 'darwin') {
-    app.quit()
-  }
-})
-
-function id(): string {
-  const id = store.get('id') as string
-
-  if (id) {
-    return id
-  }
-
-  const uuid = uuidv4()
-  store.set('id', uuid)
-  return uuid
-}
-
-autoUpdater.setFeedURL({ url: updateURL })
-
-autoUpdater.on('error', e => {
-  logger.error(`update check failed - ${e.message}`)
-  console.error(`update check failed - ${e.message}`)
-})
-
-autoUpdater.on('update-downloaded', () => {
-  updateAvailable = true
-  updateTray()
-})
--- a/macapp/src/install.ts
+++ b/macapp/src/install.ts
@@ -1,21 +0,0 @@
-import * as fs from 'fs'
-import { exec as cbExec } from 'child_process'
-import * as path from 'path'
-import { promisify } from 'util'
-
-const app = process && process.type === 'renderer' ? require('@electron/remote').app : require('electron').app
-const ollama = app.isPackaged ? path.join(process.resourcesPath, 'ollama') : path.resolve(process.cwd(), '..', 'ollama')
-const exec = promisify(cbExec)
-const symlinkPath = '/usr/local/bin/ollama'
-
-export function installed() {
-  return fs.existsSync(symlinkPath) && fs.readlinkSync(symlinkPath) === ollama
-}
-
-export async function install() {
-  const command = `do shell script "mkdir -p ${path.dirname(
-    symlinkPath
-  )} && ln -F -s \\"${ollama}\\" \\"${symlinkPath}\\"" with administrator privileges`
-
-  await exec(`osascript -e '${command}'`)
-}
--- a/macapp/src/ollama.svg
+++ b/macapp/src/ollama.svg
--- a/macapp/src/preload.ts
+++ b/macapp/src/preload.ts
--- a/macapp/src/renderer.tsx
+++ b/macapp/src/renderer.tsx
@@ -1,7 +0,0 @@
-import App from './app'
-import './app.css'
-import { createRoot } from 'react-dom/client'
-
-const container = document.getElementById('app')
-const root = createRoot(container)
-root.render(<App />)
--- a/macapp/tailwind.config.js
+++ b/macapp/tailwind.config.js
@@ -1,6 +0,0 @@
-/** @type {import('tailwindcss').Config} */
-module.exports = {
-  content: ['./src/**/*.{js,ts,jsx,tsx,mdx}'],
-  theme: {},
-  plugins: [],
-}
--- a/macapp/tsconfig.json
+++ b/macapp/tsconfig.json
@@ -1,20 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "ES6",
-    "allowJs": true,
-    "module": "commonjs",
-    "skipLibCheck": true,
-    "esModuleInterop": true,
-    "noImplicitAny": true,
-    "sourceMap": true,
-    "baseUrl": ".",
-    "outDir": "dist",
-    "moduleResolution": "node",
-    "resolveJsonModule": true,
-    "paths": {
-      "*": ["node_modules/*"]
-    },
-    "jsx": "react-jsx"
-  },
-  "include": ["src/**/*"]
-}
--- a/macapp/webpack.main.config.ts
+++ b/macapp/webpack.main.config.ts
@@ -1,20 +0,0 @@
-import type { Configuration } from 'webpack'
-
-import { rules } from './webpack.rules'
-import { plugins } from './webpack.plugins'
-
-export const mainConfig: Configuration = {
-  /**
-   * This is the main entry point for your application, it's the first file
-   * that runs in the main process.
-   */
-  entry: './src/index.ts',
-  // Put your normal webpack config below here
-  module: {
-    rules,
-  },
-  plugins,
-  resolve: {
-    extensions: ['.js', '.ts', '.jsx', '.tsx', '.css', '.json'],
-  },
-}
--- a/macapp/webpack.plugins.ts
+++ b/macapp/webpack.plugins.ts
@@ -1,14 +0,0 @@
-import type IForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin'
-import { DefinePlugin } from 'webpack'
-
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const ForkTsCheckerWebpackPlugin: typeof IForkTsCheckerWebpackPlugin = require('fork-ts-checker-webpack-plugin')
-
-export const plugins = [
-  new ForkTsCheckerWebpackPlugin({
-    logger: 'webpack-infrastructure',
-  }),
-  new DefinePlugin({
-    'process.env.TELEMETRY_WRITE_KEY': JSON.stringify(process.env.TELEMETRY_WRITE_KEY),
-  }),
-]
--- a/macapp/webpack.renderer.config.ts
+++ b/macapp/webpack.renderer.config.ts
@@ -1,19 +0,0 @@
-import type { Configuration } from 'webpack'
-
-import { rules } from './webpack.rules'
-import { plugins } from './webpack.plugins'
-
-rules.push({
-  test: /\.css$/,
-  use: [{ loader: 'style-loader' }, { loader: 'css-loader' }, { loader: 'postcss-loader' }],
-})
-
-export const rendererConfig: Configuration = {
-  module: {
-    rules,
-  },
-  plugins,
-  resolve: {
-    extensions: ['.js', '.ts', '.jsx', '.tsx', '.css'],
-  },
-}
--- a/macapp/webpack.rules.ts
+++ b/macapp/webpack.rules.ts
@@ -1,35 +0,0 @@
-import type { ModuleOptions } from 'webpack'
-
-export const rules: Required<ModuleOptions>['rules'] = [
-  // Add support for native node modules
-  {
-    // We're specifying native_modules in the test because the asset relocator loader generates a
-    // "fake" .node file which is really a cjs file.
-    test: /native_modules[/\\].+\.node$/,
-    use: 'node-loader',
-  },
-  {
-    test: /[/\\]node_modules[/\\].+\.(m?js|node)$/,
-    parser: { amd: false },
-    use: {
-      loader: '@vercel/webpack-asset-relocator-loader',
-      options: {
-        outputAssetBase: 'native_modules',
-      },
-    },
-  },
-  {
-    test: /\.tsx?$/,
-    exclude: /(node_modules|\.webpack)/,
-    use: {
-      loader: 'ts-loader',
-      options: {
-        transpileOnly: true,
-      },
-    },
-  },
-  {
-    test: /\.svg$/,
-    use: ['@svgr/webpack'],
-  },
-]
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -11,26 +11,37 @@ for TARGETARCH in arm64 amd64; do
    rm -rf llm/llama.cpp/build
    GOOS=darwin GOARCH=$TARGETARCH go generate ./...
    CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH
-    CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov
+    CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -C app -trimpath -o ../dist/ollama-app-darwin-$TARGETARCH
 done

 lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
-rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
+lipo -create -output dist/ollama-app dist/ollama-app-darwin-arm64 dist/ollama-app-darwin-amd64
+rm -f dist/ollama-darwin-* dist/ollama-app-darwin-*
+
+# create the mac app
+rm -rf dist/Ollama.app
+cp -R app/darwin/Ollama.app dist/
+/usr/libexec/PlistBuddy -c "Set :CFBundleShortVersionString $VERSION" dist/Ollama.app/Contents/Info.plist
+mkdir -p dist/Ollama.app/Contents/MacOS
+mv dist/ollama-app dist/Ollama.app/Contents/MacOS/Ollama
+cp dist/ollama dist/Ollama.app/Contents/Resources/ollama
+
+# sign and notarize the app
 if [ -n "$APPLE_IDENTITY" ]; then
-    codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
+    codesign -f --timestamp --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama dist/Ollama.app/Contents/MacOS/Ollama
+    codesign -f --timestamp --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama dist/Ollama.app/Contents/Resources/ollama
+    codesign -f --timestamp --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama dist/Ollama.app
+    ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
+    rm -rf dist/Ollama.app
+    xcrun notarytool submit dist/Ollama-darwin.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
+    unzip dist/Ollama-darwin.zip -d dist
+    rm -f dist/Ollama-darwin.zip
+    xcrun stapler staple "dist/Ollama.app"
+    ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
+    rm -rf dist/Ollama.app
 else
    echo "Skipping code signing - set APPLE_IDENTITY"
 fi
-chmod +x dist/ollama
-
-# build and optionally sign the mac app
-npm install --prefix macapp
-if [ -n "$APPLE_IDENTITY" ]; then
-    npm run --prefix macapp make:sign
-else 
-    npm run --prefix macapp make
-fi
-cp macapp/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip

 # sign the binary and rename it
 if [ -n "$APPLE_IDENTITY" ]; then
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -27,7 +27,7 @@ function checkEnv() {
    } else {
        $script:NVIDIA_DIR=$env:NVIDIA_DIR
    }
-    
+
    $script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]

    $script:DEPS_DIR="${script:SRC_DIR}\dist\windows-amd64"
@@ -70,7 +70,7 @@ function buildOllama() {
    write-host "Building ollama CLI"
    if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
        & go generate ./...
-        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}    
+        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    } else {
        write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
    }
@@ -82,14 +82,14 @@ function buildOllama() {
        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    }
    New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
-    cp .\ollama.exe .\dist\windows-amd64\ollama-windows-amd64.exe
+    cp .\ollama.exe .\dist\windows-amd64\
 }

 function buildApp() {
    write-host "Building Ollama App"
    cd "${script:SRC_DIR}\app"
-    & windres -l 0 -o ollama.syso ollama.rc
-    & go build -trimpath -ldflags "-s -w -H windowsgui -X=github.com/ollama/ollama/version.Version=$script:VERSION -X=github.com/ollama/ollama/server.mode=release" .
+    & windres -l 0 -o ollama.syso windows\ollama.rc
+    & go build -trimpath -ldflags "-s -w -H windowsgui -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    if ("${env:KEY_CONTAINER}") {
        & "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
@@ -109,11 +109,8 @@ function gatherDependencies() {
    cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
    cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"

-    cp "${script:NVIDIA_DIR}\cudart64_*.dll" "${script:DEPS_DIR}\"
-    cp "${script:NVIDIA_DIR}\cublas64_*.dll" "${script:DEPS_DIR}\"
-    cp "${script:NVIDIA_DIR}\cublasLt64_*.dll" "${script:DEPS_DIR}\"

-    cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
+    cp "${script:SRC_DIR}\app\windows\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
    if ("${env:KEY_CONTAINER}") {
        write-host "about to sign"
        foreach ($file in (get-childitem "${script:DEPS_DIR}/cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){
@@ -123,15 +120,6 @@ function gatherDependencies() {
            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
        }
    }
-    if ($null -ne $env:HIP_PATH) {
-        # Assumes v5.7, may need adjustments for v6
-        rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\"
-        md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null
-        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\"
-        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\"
-        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
-        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\"
-    }
 }

 function buildInstaller() {
@@ -139,9 +127,9 @@ function buildInstaller() {
    cd "${script:SRC_DIR}\app"
    $env:PKG_VERSION=$script:PKG_VERSION
    if ("${env:KEY_CONTAINER}") {
-        & "${script:INNO_SETUP_DIR}\ISCC.exe" /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
+        & "${script:INNO_SETUP_DIR}\ISCC.exe" /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\windows\ollama.iss
    } else {
-        & "${script:INNO_SETUP_DIR}\ISCC.exe" .\ollama.iss
+        & "${script:INNO_SETUP_DIR}\ISCC.exe" .\windows\ollama.iss
    }
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
 }
--- a/scripts/publish.sh
+++ b/scripts/publish.sh
@@ -1,25 +0,0 @@
-# Set your variables here.
-REPO="jmorganca/ollama"
-
-# Check if VERSION is set
-if [[ -z "${VERSION}" ]]; then
-  echo "VERSION is not set. Please set the VERSION environment variable."
-  exit 1
-fi
-
-OS=$(go env GOOS)
-
-./script/build_${OS}.sh
-
-# Create a new tag if it doesn't exist.
-if ! git rev-parse v$VERSION >/dev/null 2>&1; then
-  git tag v$VERSION
-fi
-
-git push origin v$VERSION
-
-# Create a new release.
-gh release create -p v$VERSION -t v$VERSION
-
-# Upload the zip file.
-gh release upload v$VERSION ./dist/* --clobber
--- a/scripts/run_darwin.sh
+++ b/scripts/run_darwin.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -e
+
+rm -rf $TMPDIR/Ollama.app
+cp -R app/darwin/Ollama.app $TMPDIR/Ollama.app
+mkdir -p $TMPDIR/Ollama.app/Contents/Resources $TMPDIR/Ollama.app/Contents/MacOS
+go build -o $TMPDIR/Ollama.app/Contents/Resources/ollama .
+go build -C app -o $TMPDIR/Ollama.app/Contents/MacOS/Ollama .
+$TMPDIR/Ollama.app/Contents/MacOS/Ollama
--- a/server/images.go
+++ b/server/images.go
@@ -29,6 +29,7 @@ import (
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )

@@ -701,36 +702,39 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
 	return path, nil
 }

-func CopyModel(src, dest string) error {
-	srcModelPath := ParseModelPath(src)
-	srcPath, err := srcModelPath.GetManifestPath()
+func CopyModel(src, dst model.Name) error {
+	if !dst.IsFullyQualified() {
+		return model.Unqualified(dst)
+	}
+	if !src.IsFullyQualified() {
+		return model.Unqualified(src)
+	}
+
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}

-	destModelPath := ParseModelPath(dest)
-	destPath, err := destModelPath.GetManifestPath()
-	if err != nil {
-		return err
-	}
-	if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
+	dstpath := filepath.Join(manifests, dst.Filepath())
+	if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
 		return err
 	}

-	// copy the file
-	input, err := os.ReadFile(srcPath)
+	srcpath := filepath.Join(manifests, src.Filepath())
+	srcfile, err := os.Open(srcpath)
 	if err != nil {
-		fmt.Println("Error reading file:", err)
 		return err
 	}
+	defer srcfile.Close()

-	err = os.WriteFile(destPath, input, 0o644)
+	dstfile, err := os.Create(dstpath)
 	if err != nil {
-		fmt.Println("Error reading file:", err)
 		return err
 	}
+	defer dstfile.Close()

-	return nil
+	_, err = io.Copy(dstfile, srcfile)
+	return err
 }

 func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
--- a/server/routes.go
+++ b/server/routes.go
@@ -29,6 +29,7 @@ import (
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )

@@ -145,6 +146,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
+		if errors.Is(err, context.Canceled) {
+			c.JSON(499, gin.H{"error": "request canceled"})
+			return
+		}
+
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -388,6 +394,11 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
+		if errors.Is(err, context.Canceled) {
+			c.JSON(499, gin.H{"error": "request canceled"})
+			return
+		}
+
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -788,34 +799,34 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
 }

 func (s *Server) CopyModelHandler(c *gin.Context) {
-	var req api.CopyRequest
-	err := c.ShouldBindJSON(&req)
-	switch {
-	case errors.Is(err, io.EOF):
+	var r api.CopyRequest
+	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
 		return
-	case err != nil:
+	} else if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}

-	if req.Source == "" || req.Destination == "" {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
+	src := model.ParseName(r.Source)
+	if !src.IsValid() {
+		_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
+	}
+
+	dst := model.ParseName(r.Destination)
+	if !dst.IsValid() {
+		_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
+	}
+
+	if len(c.Errors) > 0 {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
 		return
 	}

-	if err := ParseModelPath(req.Destination).Validate(); err != nil {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	if err := CopyModel(req.Source, req.Destination); err != nil {
-		if os.IsNotExist(err) {
-			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
-		} else {
-			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		}
-		return
+	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
+		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
+	} else if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 	}
 }

@@ -1215,6 +1226,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	select {
 	case runner = <-rCh:
 	case err = <-eCh:
+		if errors.Is(err, context.Canceled) {
+			c.JSON(499, gin.H{"error": "request canceled"})
+			return
+		}
+
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
--- a/server/sched.go
+++ b/server/sched.go
@@ -23,7 +23,6 @@ import (
 type LlmRequest struct {
 	ctx             context.Context //nolint:containedctx
 	model           *Model
-	ggml            *llm.GGML // TODO - how large is this, and do we need to free it after we've finished loading?
 	opts            api.Options
 	sessionDuration time.Duration
 	successCh       chan *runnerRef
@@ -39,7 +38,7 @@ type Scheduler struct {
 	loaded   map[string]*runnerRef
 	loadedMu sync.Mutex

-	loadFn      func(req *LlmRequest, gpus gpu.GpuInfoList)
+	loadFn      func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
 	newServerFn func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error)
 	getGpuFn    func() gpu.GpuInfoList
 }
@@ -47,6 +46,7 @@ type Scheduler struct {
 // TODO set this to zero after a release or two, to enable multiple models by default
 var loadedMax = 1          // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
 var maxQueuedRequests = 10 // TODO configurable
+var numParallel = 1

 func InitScheduler(ctx context.Context) *Scheduler {
 	maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
@@ -58,6 +58,14 @@ func InitScheduler(ctx context.Context) *Scheduler {
 			loadedMax = m
 		}
 	}
+	if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
+		p, err := strconv.Atoi(onp)
+		if err != nil || p <= 0 {
+			slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
+		} else {
+			numParallel = p
+		}
+	}

 	sched := &Scheduler{
 		pendingReqCh:  make(chan *LlmRequest, maxQueuedRequests),
@@ -74,20 +82,16 @@ func InitScheduler(ctx context.Context) *Scheduler {

 // context must be canceled to decrement ref count and release the runner
 func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
-	ggml, err := llm.LoadModel(model.ModelPath)
 	req := &LlmRequest{
 		ctx:             c,
 		model:           model,
-		ggml:            ggml,
 		opts:            opts,
 		sessionDuration: sessionDuration,
 		successCh:       make(chan *runnerRef),
 		errCh:           make(chan error, 1),
 	}
-	if err != nil {
-		req.errCh <- err
-		return req.successCh, req.errCh
-	}
+	// context split across parallel threads
+	opts.NumCtx = opts.NumCtx * numParallel
 	select {
 	case s.pendingReqCh <- req:
 	default:
@@ -130,28 +134,39 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						pending.useLoadedRunner(runner, s.finishedReqCh)
 						break
 					}
-				} else if loadedCount == 0 {
-					slog.Debug("loading first model", "model", pending.model.ModelPath)
-					gpus := s.getGpuFn()
-					g := pickBestFitGPUs(pending, gpus)
-					if g != nil {
-						gpus = g
-					}
-					s.loadFn(pending, gpus)
-					break
 				} else if loadedMax > 0 && loadedCount >= loadedMax {
 					slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
 					runnerToExpire = s.findRunnerToUnload(pending)
 				} else {
-					// More than one loaded model, so we have to see if the new one fits
+					// Either no models are loaded or below loadedMax
 					// Get a refreshed GPU list
 					gpus := s.getGpuFn()
+
+					// Load model for fitting
+					ggml, err := llm.LoadModel(pending.model.ModelPath)
+					if err != nil {
+						pending.errCh <- err
+						break
+					}
+
+					// No models loaded. Load the model but prefer the best fit.
+					if loadedCount == 0 {
+						slog.Debug("loading first model", "model", pending.model.ModelPath)
+						g := pickBestFitGPUs(pending, ggml, gpus)
+						if g != nil {
+							gpus = g
+						}
+						s.loadFn(pending, ggml, gpus)
+						break
+					}
+
+					// More than one loaded model, so we have to see if the new one fits
 					// Update free memory from currently loaded models
 					s.updateFreeSpace(gpus)
-					gpus = pickBestFitGPUs(pending, gpus)
+					gpus = pickBestFitGPUs(pending, ggml, gpus)
 					if gpus != nil {
 						slog.Debug("new model fits with existing models, loading")
-						s.loadFn(pending, gpus)
+						s.loadFn(pending, ggml, gpus)
 						break
 					}
 					runnerToExpire = s.findRunnerToUnload(pending)
@@ -282,8 +297,8 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
 	}()
 }

-func (s *Scheduler) load(req *LlmRequest, gpus gpu.GpuInfoList) {
-	llama, err := s.newServerFn(gpus, req.model.ModelPath, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
+func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) {
+	llama, err := s.newServerFn(gpus, req.model.ModelPath, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
 	if err != nil {
 		// some older models are not compatible with newer versions of llama.cpp
 		// show a generalized compatibility error until there is a better way to
@@ -417,16 +432,21 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 	slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
 	runner.refMu.Lock()
 	defer runner.refMu.Unlock()
-	// Ignore the NumGPU settings for comparison
-	optsExisting := runner.Options.Runner
-	optsExisting.NumGPU = -1
-	optsNew := req.opts.Runner
-	optsNew.NumGPU = -1
+
 	timeout := 10 * time.Second
 	if runner.loading {
 		timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
 	}
-	ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
+
+	// Don't reload runner if num_gpu=-1 was provided
+	optsExisting := runner.Options.Runner
+	optsNew := req.opts.Runner
+	if optsNew.NumGPU < 0 {
+		optsExisting.NumGPU = -1
+		optsNew.NumGPU = -1
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()
 	if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
 		!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
@@ -434,6 +454,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 		runner.llama.Ping(ctx) != nil {
 		return true
 	}
+
 	return false
 }

@@ -454,7 +475,7 @@ func (a ByDuration) Less(i, j int) bool {

 // pickBestFitGPUs will try to find the optimal placement of the model in the available GPUs where the model fully fits
 // If the model can not be fit fully within the available GPU(s) nil is returned
-func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
+func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.GpuInfoList {
 	var estimatedVRAM uint64
 	for _, gl := range gpus.ByLibrary() {
 		var ok bool
@@ -466,7 +487,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {

 		// First attempt to fit the model into a single GPU
 		for _, g := range sgl {
-			if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
+			if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
 				slog.Debug("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM))
 				return []gpu.GpuInfo{g}
 			}
@@ -477,7 +498,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
 		// - try subsets of GPUs instead of just falling back to 1 or all in a family

 		// Now try all the GPUs
-		if ok, estimatedVRAM = llm.PredictServerFit(gl, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
+		if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
 			slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
 			return gl
 		}
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -47,6 +47,7 @@ func TestLoad(t *testing.T) {
 	ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
 	defer done()
 	s := InitScheduler(ctx)
+	var ggml *llm.GGML // value not used in tests
 	req := &LlmRequest{
 		ctx:             ctx,
 		model:           &Model{ModelPath: "foo"},
@@ -59,7 +60,7 @@ func TestLoad(t *testing.T) {
 		return nil, fmt.Errorf("something failed to load model blah")
 	}
 	gpus := gpu.GpuInfoList{}
-	s.load(req, gpus)
+	s.load(req, ggml, gpus)
 	require.Len(t, req.successCh, 0)
 	require.Len(t, req.errCh, 1)
 	require.Len(t, s.loaded, 0)
@@ -70,7 +71,7 @@ func TestLoad(t *testing.T) {
 	s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
 		return server, nil
 	}
-	s.load(req, gpus)
+	s.load(req, ggml, gpus)
 	select {
 	case err := <-req.errCh:
 		require.NoError(t, err)
@@ -82,7 +83,7 @@ func TestLoad(t *testing.T) {

 	req.model.ModelPath = "dummy_model_path"
 	server.waitResp = fmt.Errorf("wait failure")
-	s.load(req, gpus)
+	s.load(req, ggml, gpus)
 	select {
 	case err := <-req.errCh:
 		require.Contains(t, err.Error(), "wait failure")
@@ -101,6 +102,7 @@ type bundle struct {
 	ctxDone func()
 	srv     *mockLlm
 	req     *LlmRequest
+	ggml    *llm.GGML
 }

 func (scenario *bundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
@@ -132,14 +134,15 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
 		{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
 	})
 	assert.Nil(t, err)
+
 	fname := f.Name()
 	model := &Model{Name: modelName, ModelPath: fname}
-	ggml, err := llm.LoadModel(model.ModelPath)
+	scenario.ggml, err = llm.LoadModel(model.ModelPath)
 	require.NoError(t, err)
+
 	scenario.req = &LlmRequest{
 		ctx:             scenario.ctx,
 		model:           model,
-		ggml:            ggml,
 		sessionDuration: 5 * time.Millisecond,
 		successCh:       make(chan *runnerRef, 1),
 		errCh:           make(chan error, 1),
@@ -157,13 +160,13 @@ func TestRequests(t *testing.T) {
 	scenario1a.req.sessionDuration = 0
 	scenario1b := newScenario(t, ctx, "ollama-model-1", 11)
 	scenario1b.req.model = scenario1a.req.model
-	scenario1b.req.ggml = scenario1a.req.ggml
+	scenario1b.ggml = scenario1a.ggml
 	scenario1b.req.sessionDuration = 0

 	// simple reload of same model
 	scenario2a := newScenario(t, ctx, "ollama-model-1", 20)
 	scenario2a.req.model = scenario1a.req.model
-	scenario2a.req.ggml = scenario1a.req.ggml
+	scenario2a.ggml = scenario1a.ggml

 	// Multiple loaded models
 	scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
@@ -322,13 +325,14 @@ func TestGetRunner(t *testing.T) {
 	successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
 	require.Len(t, s.pendingReqCh, 0)
 	require.Len(t, successCh1c, 0)
+	require.Len(t, errCh1c, 0)
+
+	time.Sleep(5 * time.Millisecond)
+	require.Len(t, s.loaded, 0)
 	require.Len(t, errCh1c, 1)
 	err = <-errCh1c
 	require.Contains(t, err.Error(), "bad path")
 	scenario1b.ctxDone()
-
-	time.Sleep(5 * time.Millisecond)
-	require.Len(t, s.loaded, 0)
 }

 // TODO - add one scenario that triggers the bogus finished event with positive ref count
@@ -366,7 +370,9 @@ func TestPrematureExpired(t *testing.T) {
 	require.LessOrEqual(t, len(s.finishedReqCh), 1)
 	time.Sleep(10 * time.Millisecond)
 	require.Len(t, s.finishedReqCh, 0)
+	s.loadedMu.Lock()
 	require.Len(t, s.loaded, 0)
+	s.loadedMu.Unlock()

 	// also shouldn't happen in real life
 	s.finishedReqCh <- scenario1a.req
@@ -426,7 +432,6 @@ func TestUpdateFreeSpace(t *testing.T) {
 	s.updateFreeSpace(gpus)
 	require.Equal(t, uint64(850), gpus[0].FreeMemory)
 	require.Equal(t, uint64(1850), gpus[1].FreeMemory)
-
 }

 func TestFindRunnerToUnload(t *testing.T) {
@@ -485,6 +490,9 @@ func TestNeedsReload(t *testing.T) {
 	require.False(t, resp)
 	req.opts.NumGPU = 99
 	resp = runner.needsReload(ctx, req)
+	require.True(t, resp)
+	req.opts.NumGPU = -1
+	resp = runner.needsReload(ctx, req)
 	require.False(t, resp)
 }

--- a/types/model/digest.go
+++ b/types/model/digest.go
@@ -1,87 +0,0 @@
-package model
-
-import (
-	"fmt"
-	"log/slog"
-	"strings"
-	"unicode"
-)
-
-// Digest represents a digest of a model Manifest. It is a comparable value
-// type and is immutable.
-//
-// The zero Digest is not a valid digest.
-type Digest struct {
-	s string
-}
-
-// Split returns the digest type and the digest value.
-func (d Digest) Split() (typ, digest string) {
-	typ, digest, _ = strings.Cut(d.s, "-")
-	return
-}
-
-// String returns the digest in the form of "<digest-type>-<digest>", or the
-// empty string if the digest is invalid.
-func (d Digest) String() string { return d.s }
-
-// IsValid returns true if the digest is valid (not zero).
-//
-// A valid digest may be created only by ParseDigest, or
-// ParseName(name).Digest().
-func (d Digest) IsValid() bool { return d.s != "" }
-
-// LogValue implements slog.Value.
-func (d Digest) LogValue() slog.Value {
-	return slog.StringValue(d.String())
-}
-
-var (
-	_ slog.LogValuer = Digest{}
-)
-
-// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
-// Digest.
-func ParseDigest(s string) Digest {
-	typ, digest, ok := strings.Cut(s, "-")
-	if !ok {
-		typ, digest, ok = strings.Cut(s, ":")
-	}
-	if ok && isValidDigestType(typ) && isValidHex(digest) && len(digest) >= 2 {
-		return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
-	}
-	return Digest{}
-}
-
-func MustParseDigest(s string) Digest {
-	d := ParseDigest(s)
-	if !d.IsValid() {
-		panic(fmt.Sprintf("invalid digest: %q", s))
-	}
-	return d
-}
-
-func isValidDigestType(s string) bool {
-	if len(s) == 0 {
-		return false
-	}
-	for _, r := range s {
-		if !unicode.IsLower(r) && !unicode.IsDigit(r) {
-			return false
-		}
-	}
-	return true
-}
-
-func isValidHex(s string) bool {
-	if len(s) == 0 {
-		return false
-	}
-	for i := range s {
-		c := s[i]
-		if c < '0' || c > '9' && c < 'a' || c > 'f' {
-			return false
-		}
-	}
-	return true
-}
--- a/types/model/digest_test.go
+++ b/types/model/digest_test.go
@@ -1,46 +0,0 @@
-package model
-
-import "testing"
-
-var testDigests = map[string]Digest{
-	"":                 {},
-	"sha256-1234":      {s: "sha256-1234"},
-	"sha256-5678":      {s: "sha256-5678"},
-	"blake2-9abc":      {s: "blake2-9abc"},
-	"-1234":            {},
-	"sha256-":          {},
-	"sha256-1234-5678": {},
-	"sha256-P":         {}, //         invalid  hex
-	"sha256-1234P":     {},
-	"---":              {},
-}
-
-func TestDigestParse(t *testing.T) {
-	// Test cases.
-	for s, want := range testDigests {
-		got := ParseDigest(s)
-		t.Logf("ParseDigest(%q) = %#v", s, got)
-		if got != want {
-			t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
-		}
-	}
-}
-
-func TestDigestString(t *testing.T) {
-	// Test cases.
-	for s, d := range testDigests {
-		want := s
-		if !d.IsValid() {
-			want = ""
-		}
-		got := d.String()
-		if got != want {
-			t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
-		}
-
-		got = ParseDigest(s).String()
-		if got != want {
-			t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
-		}
-	}
-}
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -1,718 +1,313 @@
+// Package model contains types and utilities for parsing, validating, and
+// working with model names and digests.
 package model

 import (
 	"cmp"
 	"errors"
 	"fmt"
-	"hash/maphash"
-	"io"
 	"log/slog"
-	"path"
 	"path/filepath"
-	"slices"
 	"strings"
-	"sync"
-
-	"github.com/ollama/ollama/types/structs"
 )

 // Errors
 var (
-	// ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not
-	// used by this package, but are exported so that other packages can
-	// use them, instead of defining their own errors for them.
-	ErrInvalidName    = errors.New("invalid model name")
-	ErrIncompleteName = errors.New("incomplete model name")
-	ErrInvalidDigest  = errors.New("invalid digest")
+	// ErrUnqualifiedName represents an error where a name is not fully
+	// qualified. It is not used directly in this package, but is here
+	// to avoid other packages inventing their own error type.
+	// Additionally, it can be conveniently used via [Unqualified].
+	ErrUnqualifiedName = errors.New("unqualified name")
 )

-// Defaults
-const (
-	// MaskDefault is the default mask used by [Name.DisplayShortest].
-	MaskDefault = "registry.ollama.ai/library/?:latest"
-
-	// MaskNothing is a mask that masks nothing.
-	MaskNothing = "?/?/?:?"
-
-	// DefaultFill is the default fill used by [ParseName].
-	FillDefault = "registry.ollama.ai/library/?:latest+Q4_0"
-
-	// FillNothing is a fill that fills nothing.
-	FillNothing = "?/?/?:?+?"
-)
-
-const MaxNamePartLen = 128
-
-type PartKind int
-
-// Levels of concreteness
-const (
-	// Each value aligns with its index in the Name.parts array.
-
-	PartHost PartKind = iota
-	PartNamespace
-	PartModel
-	PartTag
-	PartBuild
-	PartDigest
-
-	// NumParts is the number of parts in a Name. In this list, it must
-	// follow the final part.
-	NumParts
-
-	PartExtraneous = -1
-)
-
-var kindNames = map[PartKind]string{
-	PartHost:      "Host",
-	PartNamespace: "Namespace",
-	PartModel:     "Name",
-	PartTag:       "Tag",
-	PartBuild:     "Build",
-	PartDigest:    "Digest",
+// Unqualified is a helper function that returns an error with
+// ErrUnqualifiedName as the cause and the name as the message.
+func Unqualified(n Name) error {
+	return fmt.Errorf("%w: %s", ErrUnqualifiedName, n)
 }

-func (k PartKind) String() string {
-	return cmp.Or(kindNames[k], "Unknown")
+// MissingPart is used to indicate any part of a name that was "promised" by
+// the presence of a separator, but is missing.
+//
+// The value was chosen because it is deemed unlikely to be set by a user,
+// not a valid part name valid when checked by [Name.IsValid], and easy to
+// spot in logs.
+const MissingPart = "!MISSING!"
+
+// DefaultName returns a name with the default values for the host, namespace,
+// and tag parts. The model and digest parts are empty.
+//
+//   - The default host is ("registry.ollama.ai")
+//   - The default namespace is ("library")
+//   - The default tag is ("latest")
+func DefaultName() Name {
+	return Name{
+		Host:      "registry.ollama.ai",
+		Namespace: "library",
+		Tag:       "latest",
+	}
 }

-// Name is an opaque reference to a model. It holds the parts of a model
-// with the case preserved, but is not directly comparable with other Names
-// since model names can be represented with different casing depending on
-// the use case. For instance, "Mistral" and "mistral" are the same model
-// but each version may have come from different sources (e.g. copied from a
-// Web page, or from a file path).
+type partKind int
+
+const (
+	kindHost partKind = iota
+	kindNamespace
+	kindModel
+	kindTag
+	kindDigest
+)
+
+func (k partKind) String() string {
+	switch k {
+	case kindHost:
+		return "host"
+	case kindNamespace:
+		return "namespace"
+	case kindModel:
+		return "model"
+	case kindTag:
+		return "tag"
+	case kindDigest:
+		return "digest"
+	default:
+		return "unknown"
+	}
+}
+
+// Name is a structured representation of a model name string, as defined by
+// [ParseNameNoDefaults].
 //
-// Valid Names can ONLY be constructed by calling [ParseName].
+// It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
+// is valid.
 //
-// A Name is valid if and only if is have a valid Model part. The other parts
-// are optional.
-//
-// A Name is considered "complete" if it has all parts present. To check if a
-// Name is complete, use [Name.IsComplete].
-//
-// To compare two names in a case-insensitive manner, use [Name.EqualFold].
-//
-// The parts of a Name are:
-//
-//   - Host: the domain of the model (optional)
-//   - Namespace: the namespace of the model (optional)
-//   - Model: the name of the model (required)
-//   - Tag: the tag of the model (optional)
-//   - Build: the build of the model; usually the quantization or "file type" (optional)
-//
-// The parts can be obtained in their original form by calling [Name.Parts].
-//
-// To check if a Name has at minimum a valid model part, use [Name.IsValid].
+// It is not directly comparable with other Names. Use [Name.Equal] and
+// [Name.MapHash] for determining equality and using as a map key.
 type Name struct {
-	_     structs.Incomparable
-	parts [NumParts]string // host, namespace, model, tag, build, digest
-
-	// TODO(bmizerany): track offsets and hold s (raw string) here? We
-	// could pack the offsets all into a single uint64 since the first
-	// parts take less bits since their max offset is less than the max
-	// offset of the next part. This would save a ton of bytes per Name
-	// and mean zero allocations for String.
+	Host      string
+	Namespace string
+	Model     string
+	Tag       string
+	RawDigest string
 }

-// ParseName parses s into a Name, and returns the result of filling it with
-// defaults. The input string must be a valid string
-// representation of a model name in the form:
+// ParseName parses and assembles a Name from a name string. The
+// format of a valid name string is:
 //
-//	[host/][namespace/]<model>[:tag][+build][@<digest-type>-<digest>]
+//	  s:
+//		  { host } "/" { namespace } "/" { model } ":" { tag } "@" { digest }
+//		  { host } "/" { namespace } "/" { model } ":" { tag }
+//		  { host } "/" { namespace } "/" { model } "@" { digest }
+//		  { host } "/" { namespace } "/" { model }
+//		  { namespace } "/" { model } ":" { tag } "@" { digest }
+//		  { namespace } "/" { model } ":" { tag }
+//		  { namespace } "/" { model } "@" { digest }
+//		  { namespace } "/" { model }
+//		  { model } ":" { tag } "@" { digest }
+//		  { model } ":" { tag }
+//		  { model } "@" { digest }
+//		  { model }
+//		  "@" { digest }
+//	  host:
+//	      pattern: alphanum { alphanum | "-" | "_" | "." | ":" }*
+//	      length:  [1, 350]
+//	  namespace:
+//	      pattern: alphanum { alphanum | "-" | "_" }*
+//	      length:  [2, 80]
+//	  model:
+//	      pattern: alphanum { alphanum | "-" | "_" | "." }*
+//	      length:  [2, 80]
+//	  tag:
+//	      pattern: alphanum { alphanum | "-" | "_" | "." }*
+//	      length:  [1, 80]
+//	  digest:
+//	      pattern: alphanum { alphanum | "-" | ":" }*
+//	      length:  [2, 80]
 //
-// The name part is required, all others are optional. If a part is missing,
-// it is left empty in the returned Name. If a part is invalid, the zero Ref
-// value is returned.
+// Most users should use [ParseName] instead, unless need to support
+// different defaults than DefaultName.
 //
-// The build part is normalized to uppercase.
-//
-// Examples of valid paths:
-//
-//	"example.com/library/mistral:7b+x"
-//	"example.com/eva/mistral:7b+Q4_0"
-//	"mistral:7b+x"
-//	"example.com/mike/mistral:latest+Q4_0"
-//	"example.com/bruce/mistral:latest"
-//	"example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef"
-//
-// Examples of invalid paths:
-//
-//	"example.com/mistral:7b+"
-//	"example.com/mistral:7b+Q4_0+"
-//	"x/y/z/z:8n+I"
-//	""
-//
-// It returns the zero value if any part is invalid.
-//
-// # Fills
-//
-// For any valid s, the fill string is used to fill in missing parts of the
-// Name. The fill string must be a valid Name with the exception that any part
-// may be the string ("?"), which will not be considered for filling.
-func ParseNameFill(s, fill string) Name {
-	var r Name
-	parts(s)(func(kind PartKind, part string) bool {
-		if kind == PartDigest && !ParseDigest(part).IsValid() {
-			r = Name{}
-			return false
-		}
-		if kind == PartExtraneous || !IsValidNamePart(kind, part) {
-			r = Name{}
-			return false
-		}
-		r.parts[kind] = part
-		return true
-	})
-	if r.IsValid() || r.IsResolved() {
-		return fillName(r, fill)
-	}
-	return Name{}
-}
-
-// ParseName parses s into a Name, and returns the result of filling it
-// with FillDefault. The input string must be a valid string representation
-// of a model
+// The name returned is not guaranteed to be valid. If it is not valid, the
+// field values are left in an undefined state. Use [Name.IsValid] to check
+// if the name is valid.
 func ParseName(s string) Name {
-	return ParseNameFill(s, "")
+	return Merge(ParseNameBare(s), DefaultName())
 }

-func parseMask(s string) Name {
-	var r Name
-	parts(s)(func(kind PartKind, part string) bool {
-		if part == "?" {
-			// mask part; treat as empty but valid
-			return true
-		}
-		if !IsValidNamePart(kind, part) {
-			panic(fmt.Errorf("invalid mask part %s: %q", kind, part))
-		}
-		r.parts[kind] = part
-		return true
-	})
-	return r
-}
+// ParseNameBare parses s as a name string and returns a Name. No merge with
+// [DefaultName] is performed.
+func ParseNameBare(s string) Name {
+	var n Name
+	var promised bool

-func MustParseName(s, fill string) Name {
-	r := ParseNameFill(s, fill)
-	if !r.IsValid() {
-		panic("invalid Name: " + s)
+	s, n.RawDigest, promised = cutLast(s, "@")
+	if promised && n.RawDigest == "" {
+		n.RawDigest = MissingPart
 	}
-	return r
-}

-// fillName fills in the missing parts of dst with the parts of src.
-//
-// The returned Name will only be valid if dst is valid.
-//
-// It skipps fill parts that are "?".
-func fillName(r Name, fill string) Name {
-	fill = cmp.Or(fill, FillDefault)
-	f := parseMask(fill)
-	if fill != FillNothing && f.IsZero() {
-		panic("invalid fill")
+	s, n.Tag, _ = cutPromised(s, ":")
+	s, n.Model, promised = cutPromised(s, "/")
+	if !promised {
+		n.Model = s
+		return n
 	}
-	for i := range r.parts {
-		if f.parts[i] == "?" {
-			continue
-		}
-		r.parts[i] = cmp.Or(r.parts[i], f.parts[i])
+	s, n.Namespace, promised = cutPromised(s, "/")
+	if !promised {
+		n.Namespace = s
+		return n
 	}
-	return r
+	n.Host = s
+
+	return n
 }

-// WithBuild returns a copy of r with the build set to the given string.
-func (r Name) WithBuild(build string) Name {
-	r.parts[PartBuild] = build
-	return r
+// Merge merges the host, namespace, and tag parts of the two names,
+// preferring the non-empty parts of a.
+func Merge(a, b Name) Name {
+	a.Host = cmp.Or(a.Host, b.Host)
+	a.Namespace = cmp.Or(a.Namespace, b.Namespace)
+	a.Tag = cmp.Or(a.Tag, b.Tag)
+	return a
 }

-func (r Name) WithDigest(digest Digest) Name {
-	r.parts[PartDigest] = digest.String()
-	return r
-}
-
-var mapHashSeed = maphash.MakeSeed()
-
-// MapHash returns a case insensitive hash for use in maps and equality
-// checks. For a convenient way to compare names, use [Name.EqualFold].
-//
-//nolint:errcheck
-func (r Name) MapHash() uint64 {
-	// correctly hash the parts with case insensitive comparison
-	var h maphash.Hash
-	h.SetSeed(mapHashSeed)
-	for _, part := range r.parts {
-		// downcase the part for hashing
-		for i := range part {
-			c := part[i]
-			if c >= 'A' && c <= 'Z' {
-				c = c - 'A' + 'a'
-			}
-			h.WriteByte(c)
-		}
+// String returns the name string, in the format that [ParseNameNoDefaults]
+// accepts as valid, if [Name.IsValid] reports true; otherwise the empty
+// string is returned.
+func (n Name) String() string {
+	var b strings.Builder
+	if n.Host != "" {
+		b.WriteString(n.Host)
+		b.WriteByte('/')
 	}
-	return h.Sum64()
-}
-
-func (r Name) slice(from, to PartKind) Name {
-	var v Name
-	copy(v.parts[from:to+1], r.parts[from:to+1])
-	return v
-}
-
-// DisplayShortest returns the shortest possible, masked display string in form:
-//
-//	[host/][<namespace>/]<model>[:<tag>]
-//
-// # Masks
-//
-// The mask is a string that specifies which parts of the name to omit based
-// on case-insensitive comparison. [Name.DisplayShortest] omits parts of the name
-// that are the same as the mask, moving from left to right until the first
-// unequal part is found. It then moves right to left until the first unequal
-// part is found. The result is the shortest possible display string.
-//
-// Unlike a [Name] the mask can contain "?" characters which are treated as
-// wildcards. A "?" will never match a part of the name, since a valid name
-// can never contain a "?" character.
-//
-// For example: Given a Name ("registry.ollama.ai/library/mistral:latest") masked
-// with ("registry.ollama.ai/library/?:latest") will produce the display string
-// ("mistral").
-//
-// If mask is the empty string, then [MaskDefault] is used.
-//
-// DisplayShortest panics if the mask is not the empty string, MaskNothing, and
-// invalid.
-//
-// # Builds
-//
-// For now, DisplayShortest does consider the build or return one in the
-// result. We can lift this restriction when needed.
-func (r Name) DisplayShortest(mask string) string {
-	mask = cmp.Or(mask, MaskDefault)
-	d := parseMask(mask)
-	if mask != MaskNothing && r.IsZero() {
-		panic("invalid Name")
+	if n.Namespace != "" {
+		b.WriteString(n.Namespace)
+		b.WriteByte('/')
 	}
-	for i := range PartTag {
-		if !strings.EqualFold(r.parts[i], d.parts[i]) {
-			break
-		}
-		r.parts[i] = ""
+	b.WriteString(n.Model)
+	if n.Tag != "" {
+		b.WriteByte(':')
+		b.WriteString(n.Tag)
 	}
-	for i := PartTag; i >= 0; i-- {
-		if !strings.EqualFold(r.parts[i], d.parts[i]) {
-			break
-		}
-		r.parts[i] = ""
+	if n.RawDigest != "" {
+		b.WriteByte('@')
+		b.WriteString(n.RawDigest)
 	}
-	return r.slice(PartHost, PartTag).DisplayLong()
-}
-
-// DisplayLongest returns the result of r.DisplayShortest(MaskNothing).
-func (r Name) DisplayLongest() string {
-	return r.DisplayShortest(MaskNothing)
-}
-
-var seps = [...]string{
-	PartHost:      "/",
-	PartNamespace: "/",
-	PartModel:     ":",
-	PartTag:       "+",
-	PartBuild:     "@",
-	PartDigest:    "",
-}
-
-// WriteTo implements io.WriterTo. It writes the fullest possible display
-// string in form:
-//
-//	<host>/<namespace>/<model>:<tag>+<build>@<digest-type>-<digest>
-//
-// Missing parts and their separators are not written.
-//
-// The full digest is always prefixed with "@". That is if [Name.IsValid]
-// reports false and [Name.IsResolved] reports true, then the string is
-// returned as "@<digest-type>-<digest>".
-func (r Name) writeTo(w io.StringWriter) error {
-	var partsWritten int
-	for i := range r.parts {
-		if r.parts[i] == "" {
-			continue
-		}
-		if partsWritten > 0 || i == int(PartDigest) {
-			if _, err := w.WriteString(seps[i-1]); err != nil {
-				return err
-			}
-		}
-		if _, err := w.WriteString(r.parts[i]); err != nil {
-			return err
-		}
-		partsWritten++
-	}
-	return nil
-}
-
-var builderPool = sync.Pool{
-	New: func() interface{} {
-		return &strings.Builder{}
-	},
-}
-
-// DisplayLong returns the fullest possible display string in form:
-//
-//	<host>/<namespace>/<model>:<tag>+<build>
-//
-// If any part is missing, it is omitted from the display string.
-func (r Name) DisplayLong() string {
-	b := builderPool.Get().(*strings.Builder)
-	defer builderPool.Put(b)
-	b.Reset()
-	b.Grow(50) // arbitrarily long enough for most names
-	_ = r.writeTo(b)
 	return b.String()
 }

-// GoString implements fmt.GoStringer. It returns a string suitable for
-// debugging and logging. It is similar to [Name.DisplayLong] but it always
-// returns a string that includes all parts of the Name, with missing parts
-// replaced with a ("?").
-func (r Name) GoString() string {
-	for i := range r.parts {
-		r.parts[i] = cmp.Or(r.parts[i], "?")
-	}
-	return r.DisplayLong()
-}
-
-// LogValue implements slog.Valuer.
-func (r Name) LogValue() slog.Value {
-	return slog.StringValue(r.GoString())
-}
-
-// IsComplete reports whether the Name is fully qualified. That is it has a
-// domain, namespace, name, tag, and build.
-func (r Name) IsComplete() bool {
-	return !slices.Contains(r.parts[:PartDigest], "")
-}
-
-// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the
-// build part to be present.
-func (r Name) IsCompleteNoBuild() bool {
-	return !slices.Contains(r.parts[:PartBuild], "")
-}
-
-// IsResolved reports true if the Name has a valid digest.
-//
-// It is possible to have a valid Name, or a complete Name that is not
-// resolved.
-func (r Name) IsResolved() bool {
-	return r.Digest().IsValid()
-}
-
-// Digest returns the digest part of the Name, if any.
-//
-// If Digest returns a non-empty string, then [Name.IsResolved] will return
-// true, and digest is considered valid.
-func (r Name) Digest() Digest {
-	// This was already validated by ParseName, so we can just return it.
-	return Digest{r.parts[PartDigest]}
-}
-
-// EqualFold reports whether r and o are equivalent model names, ignoring
-// case.
-func (r Name) EqualFold(o Name) bool {
-	return r.CompareFold(o) == 0
-}
-
-// CompareFold performs a case-insensitive cmp.Compare on r and o.
-//
-// This can be used with [slices.SortFunc].
-//
-// For simple equality checks, use [Name.EqualFold].
-func (r Name) CompareFold(o Name) int {
-	return slices.CompareFunc(r.parts[:], o.parts[:], compareFold)
-}
-
-func compareFold(a, b string) int {
-	return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int {
-		return cmp.Compare(downcase(a), downcase(b))
-	})
-}
-
-func downcase(r rune) rune {
-	if r >= 'A' && r <= 'Z' {
-		return r - 'A' + 'a'
-	}
-	return r
-}
-
-func (r Name) Host() string      { return r.parts[PartHost] }
-func (r Name) Namespace() string { return r.parts[PartNamespace] }
-func (r Name) Model() string     { return r.parts[PartModel] }
-func (r Name) Build() string     { return r.parts[PartBuild] }
-func (r Name) Tag() string       { return r.parts[PartTag] }
-
-// iter_Seq2 is a iter.Seq2 defined here to avoid the current build
-// restrictions in the go1.22 iter package requiring the
-// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag,
-// which we are not yet ready to support.
-//
-// Once we are ready to support rangefunc, this can be removed and replaced
-// with the iter.Seq2 type.
-type iter_Seq2[A, B any] func(func(A, B) bool)
-
-// Parts returns a sequence of the parts of a Name string from most specific
-// to least specific.
-//
-// It normalizes the input string by removing "http://" and "https://" only.
-// No other normalizations are performed.
-func parts(s string) iter_Seq2[PartKind, string] {
-	return func(yield func(PartKind, string) bool) {
-		if strings.HasPrefix(s, "http://") {
-			s = strings.TrimPrefix(s, "http://")
-		} else {
-			s = strings.TrimPrefix(s, "https://")
-		}
-
-		if len(s) > MaxNamePartLen || len(s) == 0 {
-			return
-		}
-
-		numConsecutiveDots := 0
-		partLen := 0
-		state, j := PartDigest, len(s)
-		for i := len(s) - 1; i >= 0; i-- {
-			if partLen++; partLen > MaxNamePartLen {
-				// catch a part that is too long early, so
-				// we don't keep spinning on it, waiting for
-				// an isInValidPart check which would scan
-				// over it again.
-				yield(state, s[i+1:j])
-				return
-			}
-
-			switch s[i] {
-			case '@':
-				switch state {
-				case PartDigest:
-					if !yield(PartDigest, s[i+1:j]) {
-						return
-					}
-					if i == 0 {
-						// This is the form
-						// "@<digest>" which is valid.
-						//
-						// We're done.
-						return
-					}
-					state, j, partLen = PartBuild, i, 0
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			case '+':
-				switch state {
-				case PartBuild, PartDigest:
-					if !yield(PartBuild, s[i+1:j]) {
-						return
-					}
-					state, j, partLen = PartTag, i, 0
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			case ':':
-				switch state {
-				case PartTag, PartBuild, PartDigest:
-					if !yield(PartTag, s[i+1:j]) {
-						return
-					}
-					state, j, partLen = PartModel, i, 0
-				case PartHost:
-					// noop: support for host:port
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			case '/':
-				switch state {
-				case PartModel, PartTag, PartBuild, PartDigest:
-					if !yield(PartModel, s[i+1:j]) {
-						return
-					}
-					state, j = PartNamespace, i
-				case PartNamespace:
-					if !yield(PartNamespace, s[i+1:j]) {
-						return
-					}
-					state, j, partLen = PartHost, i, 0
-				default:
-					yield(PartExtraneous, s[i+1:j])
-					return
-				}
-			default:
-				if s[i] == '.' {
-					if numConsecutiveDots++; numConsecutiveDots > 1 {
-						yield(state, "")
-						return
-					}
-				} else {
-					numConsecutiveDots = 0
-				}
-			}
-		}
-
-		if state <= PartNamespace {
-			yield(state, s[:j])
-		} else {
-			yield(PartModel, s[:j])
-		}
-	}
-}
-
-func (r Name) IsZero() bool {
-	return r.parts == [NumParts]string{}
-}
-
-// IsValid reports if a model has at minimum a valid model part.
-func (r Name) IsValid() bool {
-	// Parts ensures we only have valid parts, so no need to validate
-	// them here, only check if we have a name or not.
-	return r.parts[PartModel] != ""
-}
-
-// ParseNameFromURLPath parses forms of a URL path into a Name. Specifically,
-// it trims any leading "/" and then calls [ParseName] with fill.
-func ParseNameFromURLPath(s, fill string) Name {
-	s = strings.TrimPrefix(s, "/")
-	return ParseNameFill(s, fill)
-}
-
-func ParseNameFromURLPathFill(s, fill string) Name {
-	return ParseNameFill(s, fill)
-}
-
-// URLPath returns a complete, canonicalized, relative URL path using the parts of a
-// complete Name.
-//
-// The parts maintain their original case.
-//
-// Example:
-//
-//	ParseName("example.com/namespace/model:tag+build").URLPath() // returns "/example.com/namespace/model:tag"
-func (r Name) DisplayURLPath() string {
-	return r.DisplayShortest(MaskNothing)
-}
-
-// URLPath returns a complete, canonicalized, relative URL path using the parts of a
-// complete Name in the form:
-//
-//	<host>/<namespace>/<model>/<tag>
-//
-// The parts are downcased.
-func (r Name) URLPath() string {
-	return strings.ToLower(path.Join(r.parts[:PartBuild]...))
-}
-
-// ParseNameFromFilepath parses a file path into a Name. The input string must be a
-// valid file path representation of a model name in the form:
-//
-//	host/namespace/model/tag/build
-//
-// The zero valid is returned if s does not contain all path elements
-// leading up to the model part, or if any path element is an invalid part
-// for the its corresponding part kind.
-//
-// The fill string is used to fill in missing parts of any constructed Name.
-// See [ParseName] for more information on the fill string.
-func ParseNameFromFilepath(s, fill string) Name {
-	var r Name
-	for i := range PartBuild + 1 {
-		part, rest, _ := strings.Cut(s, string(filepath.Separator))
-		if !IsValidNamePart(i, part) {
-			return Name{}
-		}
-		r.parts[i] = part
-		s = rest
-		if s == "" {
-			break
-		}
-	}
-	if s != "" {
-		return Name{}
-	}
-	if !r.IsValid() {
-		return Name{}
-	}
-	return fillName(r, fill)
-}
-
-// Filepath returns a complete, canonicalized, relative file path using the
-// parts of a complete Name.
-//
-// Each parts is downcased, except for the build part which is upcased.
-//
-// Example:
-//
-//	ParseName("example.com/namespace/model:tag+build").Filepath() // returns "example.com/namespace/model/tag/BUILD"
-func (r Name) Filepath() string {
-	for i := range r.parts {
-		if PartKind(i) == PartBuild {
-			r.parts[i] = strings.ToUpper(r.parts[i])
-		} else {
-			r.parts[i] = strings.ToLower(r.parts[i])
-		}
-	}
-	return filepath.Join(r.parts[:]...)
-}
-
-// FilepathNoBuild returns a complete, canonicalized, relative file path using
-// the parts of a complete Name, but without the build part.
-func (r Name) FilepathNoBuild() string {
-	for i := range PartBuild {
-		r.parts[i] = strings.ToLower(r.parts[i])
-	}
-	return filepath.Join(r.parts[:PartBuild]...)
-}
-
-// IsValidNamePart reports if s contains all valid characters for the given
-// part kind and is under MaxNamePartLen bytes.
-func IsValidNamePart(kind PartKind, s string) bool {
-	if len(s) > MaxNamePartLen {
+// IsValid reports whether all parts of the name are present and valid. The
+// digest is a special case, and is checked for validity only if present.
+func (n Name) IsValid() bool {
+	if n.RawDigest != "" && !isValidPart(kindDigest, n.RawDigest) {
 		return false
 	}
-	if s == "" {
-		return false
+	return n.IsFullyQualified()
+}
+
+// IsFullyQualified returns true if all parts of the name are present and
+// valid without the digest.
+func (n Name) IsFullyQualified() bool {
+	var parts = []string{
+		n.Host,
+		n.Namespace,
+		n.Model,
+		n.Tag,
 	}
-	var consecutiveDots int
-	for _, c := range []byte(s) {
-		if c == '.' {
-			if consecutiveDots++; consecutiveDots >= 2 {
-				return false
-			}
-		} else {
-			consecutiveDots = 0
-		}
-		if !isValidByteFor(kind, c) {
+	for i, part := range parts {
+		if !isValidPart(partKind(i), part) {
 			return false
 		}
 	}
 	return true
 }

-func isValidByteFor(kind PartKind, c byte) bool {
-	if kind == PartNamespace && c == '.' {
+// Filepath returns a canonical filepath that represents the name with each part from
+// host to tag as a directory in the form:
+//
+//	{host}/{namespace}/{model}/{tag}
+//
+// It uses the system's filepath separator and ensures the path is clean.
+//
+// It panics if the name is not fully qualified. Use [Name.IsFullyQualified]
+// to check if the name is fully qualified.
+func (n Name) Filepath() string {
+	if !n.IsFullyQualified() {
+		panic("illegal attempt to get filepath of invalid name")
+	}
+	return filepath.Join(
+		strings.ToLower(n.Host),
+		strings.ToLower(n.Namespace),
+		strings.ToLower(n.Model),
+		strings.ToLower(n.Tag),
+	)
+}
+
+// LogValue returns a slog.Value that represents the name as a string.
+func (n Name) LogValue() slog.Value {
+	return slog.StringValue(n.String())
+}
+
+func isValidLen(kind partKind, s string) bool {
+	switch kind {
+	case kindHost:
+		return len(s) >= 1 && len(s) <= 350
+	case kindTag:
+		return len(s) >= 1 && len(s) <= 80
+	default:
+		return len(s) >= 2 && len(s) <= 80
+	}
+}
+
+func isValidPart(kind partKind, s string) bool {
+	if !isValidLen(kind, s) {
 		return false
 	}
-	if kind == PartHost && c == ':' {
-		return true
+	for i := range s {
+		if i == 0 {
+			if !isAlphanumeric(s[i]) {
+				return false
+			}
+			continue
+		}
+		switch s[i] {
+		case '_', '-':
+		case '.':
+			if kind == kindNamespace {
+				return false
+			}
+		case ':':
+			if kind != kindHost && kind != kindDigest {
+				return false
+			}
+		default:
+			if !isAlphanumeric(s[i]) {
+				return false
+			}
+		}
 	}
-	if c == '.' || c == '-' {
-		return true
-	}
-	if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' {
-		return true
-	}
-	return false
+	return true
+}
+
+func isAlphanumeric(c byte) bool {
+	return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9'
+}
+
+func cutLast(s, sep string) (before, after string, ok bool) {
+	i := strings.LastIndex(s, sep)
+	if i >= 0 {
+		return s[:i], s[i+len(sep):], true
+	}
+	return s, "", false
+}
+
+// cutPromised cuts the last part of s at the last occurrence of sep. If sep is
+// found, the part before and after sep are returned as-is unless empty, in
+// which case they are returned as MissingPart, which will cause
+// [Name.IsValid] to return false.
+func cutPromised(s, sep string) (before, after string, ok bool) {
+	before, after, ok = cutLast(s, sep)
+	if !ok {
+		return before, after, false
+	}
+	return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true
 }
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -1,715 +1,237 @@
 package model

 import (
-	"bytes"
-	"cmp"
-	"fmt"
-	"log/slog"
-	"path/filepath"
-	"slices"
-	"strings"
+	"reflect"
 	"testing"
 )

-type fields struct {
-	host, namespace, model, tag, build string
-	digest                             string
-}
+const (
+	part80  = "88888888888888888888888888888888888888888888888888888888888888888888888888888888"
+	part350 = "33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"
+)

-func fieldsFromName(p Name) fields {
-	return fields{
-		host:      p.parts[PartHost],
-		namespace: p.parts[PartNamespace],
-		model:     p.parts[PartModel],
-		tag:       p.parts[PartTag],
-		build:     p.parts[PartBuild],
-		digest:    p.parts[PartDigest],
-	}
-}
-
-var testNames = map[string]fields{
-	"mistral:latest":                 {model: "mistral", tag: "latest"},
-	"mistral":                        {model: "mistral"},
-	"mistral:30B":                    {model: "mistral", tag: "30B"},
-	"mistral:7b":                     {model: "mistral", tag: "7b"},
-	"mistral:7b+Q4_0":                {model: "mistral", tag: "7b", build: "Q4_0"},
-	"mistral+KQED":                   {model: "mistral", build: "KQED"},
-	"mistral.x-3:7b+Q4_0":            {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
-	"mistral:7b+q4_0":                {model: "mistral", tag: "7b", build: "q4_0"},
-	"llama2":                         {model: "llama2"},
-	"user/model":                     {namespace: "user", model: "model"},
-	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
-	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
-	"localhost:5000/ns/mistral":      {host: "localhost:5000", namespace: "ns", model: "mistral"},
-
-	// invalid digest
-	"mistral:latest@invalid256-": {},
-	"mistral:latest@-123":        {},
-	"mistral:latest@!-123":       {},
-	"mistral:latest@1-!":         {},
-	"mistral:latest@":            {},
-
-	// resolved
-	"x@sha123-12": {model: "x", digest: "sha123-12"},
-	"@sha456-22":  {digest: "sha456-22"},
-	"@sha456-1":  {},
-	"@@sha123-22": {},
-
-	// preserves case for build
-	"x+b": {model: "x", build: "b"},
-
-	// invalid (includes fuzzing trophies)
-	" / / : + ": {},
-	" / : + ":   {},
-	" : + ":     {},
-	" + ":       {},
-	" : ":       {},
-	" / ":       {},
-	" /":        {},
-	"/ ":        {},
-	"/":         {},
-	":":         {},
-	"+":         {},
-
-	// (".") in namepsace is not allowed
-	"invalid.com/7b+x": {},
-
-	"invalid:7b+Q4_0:latest": {},
-	"in valid":               {},
-	"invalid/y/z/foo":        {},
-	"/0":                     {},
-	"0 /0":                   {},
-	"0 /":                    {},
-	"0/":                     {},
-	":/0":                    {},
-	"+0/00000":               {},
-	"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
-	"0//0":                        {},
-	"m+^^^":                       {},
-	"file:///etc/passwd":          {},
-	"file:///etc/passwd:latest":   {},
-	"file:///etc/passwd:latest+u": {},
-
-	":x": {},
-	"+x": {},
-	"x+": {},
-
-	// Disallow ("\.+") in any part to prevent path traversal anywhere
-	// we convert the name to a path.
-	"../etc/passwd":  {},
-	".../etc/passwd": {},
-	"./../passwd":    {},
-	"./0+..":         {},
-
-	strings.Repeat("a", MaxNamePartLen):   {model: strings.Repeat("a", MaxNamePartLen)},
-	strings.Repeat("a", MaxNamePartLen+1): {},
-}
-
-func TestIsValidNameLen(t *testing.T) {
-	if IsValidNamePart(PartNamespace, strings.Repeat("a", MaxNamePartLen+1)) {
-		t.Errorf("unexpectedly valid long name")
-	}
-}
-
-// TestConsecutiveDots tests that consecutive dots are not allowed in any
-// part, to avoid path traversal. There also are some tests in testNames, but
-// this test is more exhaustive and exists to emphasize the importance of
-// preventing path traversal.
-func TestNameConsecutiveDots(t *testing.T) {
-	for i := 1; i < 10; i++ {
-		s := strings.Repeat(".", i)
-		if i > 1 {
-			if g := ParseNameFill(s, FillNothing).DisplayLong(); g != "" {
-				t.Errorf("ParseName(%q) = %q; want empty string", s, g)
-			}
-		} else {
-			if g := ParseNameFill(s, FillNothing).DisplayLong(); g != s {
-				t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
-			}
-		}
-	}
-}
-
-func TestNameParts(t *testing.T) {
-	var p Name
-	if w, g := int(NumParts), len(p.parts); w != g {
-		t.Errorf("Parts() = %d; want %d", g, w)
-	}
-}
-
-func TestNamePartString(t *testing.T) {
-	if g := PartKind(-2).String(); g != "Unknown" {
-		t.Errorf("Unknown part = %q; want %q", g, "Unknown")
-	}
-	for kind, name := range kindNames {
-		if g := kind.String(); g != name {
-			t.Errorf("%s = %q; want %q", kind, g, name)
-		}
-	}
-}
-
-func TestParseName(t *testing.T) {
-	for baseName, want := range testNames {
-		for _, prefix := range []string{"", "https://", "http://"} {
-			// We should get the same results with or without the
-			// http(s) prefixes
-			s := prefix + baseName
-
-			t.Run(s, func(t *testing.T) {
-				name := ParseNameFill(s, FillNothing)
-				got := fieldsFromName(name)
-				if got != want {
-					t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
-				}
-
-				// test round-trip
-				if !ParseNameFill(name.DisplayLong(), FillNothing).EqualFold(name) {
-					t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName)
-				}
-			})
-		}
-	}
-}
-
-func TestParseNameFill(t *testing.T) {
-	cases := []struct {
-		in   string
-		fill string
-		want string
-	}{
-		{"mistral", "example.com/library/?:latest+Q4_0", "example.com/library/mistral:latest+Q4_0"},
-		{"mistral", "example.com/library/?:latest", "example.com/library/mistral:latest"},
-		{"llama2:x", "example.com/library/?:latest+Q4_0", "example.com/library/llama2:x+Q4_0"},
-
-		// Invalid
-		{"", "example.com/library/?:latest+Q4_0", ""},
-		{"llama2:?", "example.com/library/?:latest+Q4_0", ""},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			name := ParseNameFill(tt.in, tt.fill)
-			if g := name.DisplayLong(); g != tt.want {
-				t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want)
-			}
-		})
-	}
-
-	t.Run("invalid fill", func(t *testing.T) {
-		defer func() {
-			if recover() == nil {
-				t.Fatal("expected panic")
-			}
-		}()
-		ParseNameFill("x", "^")
-	})
-}
-
-func TestParseNameHTTPDoublePrefixStrip(t *testing.T) {
-	cases := []string{
-		"http://https://valid.com/valid/valid:latest",
-		"https://http://valid.com/valid/valid:latest",
-	}
-	for _, s := range cases {
-		t.Run(s, func(t *testing.T) {
-			name := ParseNameFill(s, FillNothing)
-			if name.IsValid() {
-				t.Errorf("expected invalid path; got %#v", name)
-			}
-		})
-	}
-
-}
-
-func TestCompleteWithAndWithoutBuild(t *testing.T) {
+func TestParseNameParts(t *testing.T) {
 	cases := []struct {
 		in              string
-		complete        bool
-		completeNoBuild bool
+		want            Name
+		wantValidDigest bool
 	}{
-		{"", false, false},
-		{"incomplete/mistral:7b+x", false, false},
-		{"incomplete/mistral:7b+Q4_0", false, false},
-		{"incomplete:7b+x", false, false},
-		{"complete.com/x/mistral:latest+Q4_0", true, true},
-		{"complete.com/x/mistral:latest", false, true},
+		{
+			in: "host/namespace/model:tag",
+			want: Name{
+				Host:      "host",
+				Namespace: "namespace",
+				Model:     "model",
+				Tag:       "tag",
+			},
+		},
+		{
+			in: "host/namespace/model",
+			want: Name{
+				Host:      "host",
+				Namespace: "namespace",
+				Model:     "model",
+			},
+		},
+		{
+			in: "namespace/model",
+			want: Name{
+				Namespace: "namespace",
+				Model:     "model",
+			},
+		},
+		{
+			in: "model",
+			want: Name{
+				Model: "model",
+			},
+		},
+		{
+			in: "h/nn/mm:t",
+			want: Name{
+				Host:      "h",
+				Namespace: "nn",
+				Model:     "mm",
+				Tag:       "t",
+			},
+		},
+		{
+			in: part80 + "/" + part80 + "/" + part80 + ":" + part80,
+			want: Name{
+				Host:      part80,
+				Namespace: part80,
+				Model:     part80,
+				Tag:       part80,
+			},
+		},
+		{
+			in: part350 + "/" + part80 + "/" + part80 + ":" + part80,
+			want: Name{
+				Host:      part350,
+				Namespace: part80,
+				Model:     part80,
+				Tag:       part80,
+			},
+		},
+		{
+			in: "@digest",
+			want: Name{
+				RawDigest: "digest",
+			},
+			wantValidDigest: false,
+		},
+		{
+			in: "model@sha256:123",
+			want: Name{
+				Model:     "model",
+				RawDigest: "sha256:123",
+			},
+			wantValidDigest: true,
+		},
 	}

 	for _, tt := range cases {
 		t.Run(tt.in, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			if g := p.IsComplete(); g != tt.complete {
-				t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
-			}
-			if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
-				t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
-			}
-		})
-	}
-
-	// Complete uses Parts which returns a slice, but it should be
-	// inlined when used in Complete, preventing any allocations or
-	// escaping to the heap.
-	allocs := testing.AllocsPerRun(1000, func() {
-		keep(ParseNameFill("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
-	})
-	if allocs > 0 {
-		t.Errorf("Complete allocs = %v; want 0", allocs)
-	}
-}
-
-func TestNameLogValue(t *testing.T) {
-	cases := []string{
-		"example.com/library/mistral:latest+Q4_0",
-		"mistral:latest",
-		"mistral:7b+Q4_0",
-	}
-	for _, s := range cases {
-		t.Run(s, func(t *testing.T) {
-			var b bytes.Buffer
-			log := slog.New(slog.NewTextHandler(&b, nil))
-			name := ParseNameFill(s, FillNothing)
-			log.Info("", "name", name)
-			want := fmt.Sprintf("name=%s", name.GoString())
-			got := b.String()
-			if !strings.Contains(got, want) {
-				t.Errorf("expected log output to contain %q; got %q", want, got)
+			got := ParseNameBare(tt.in)
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
 			}
 		})
 	}
 }

-func TestNameGoString(t *testing.T) {
+var testCases = map[string]bool{ // name -> valid
+	"host/namespace/model:tag": true,
+	"host/namespace/model":     false,
+	"namespace/model":          false,
+	"model":                    false,
+	"@sha256-1000000000000000000000000000000000000000000000000000000000000000":      false,
+	"model@sha256-1000000000000000000000000000000000000000000000000000000000000000": false,
+	"model@sha256:1000000000000000000000000000000000000000000000000000000000000000": false,
+
+	// long (but valid)
+	part80 + "/" + part80 + "/" + part80 + ":" + part80:  true,
+	part350 + "/" + part80 + "/" + part80 + ":" + part80: true,
+
+	"h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
+	"h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
+
+	"m":        false, // model too short
+	"n/mm:":    false, // namespace too short
+	"h/n/mm:t": false, // namespace too short
+	"@t":       false, // digest too short
+	"mm@d":     false, // digest too short
+
+	// invalids
+	"^":      false,
+	"mm:":    false,
+	"/nn/mm": false,
+	"//":     false,
+	"//mm":   false,
+	"hh//":   false,
+	"//mm:@": false,
+	"00@":    false,
+	"@":      false,
+
+	// not starting with alphanum
+	"-hh/nn/mm:tt@dd": false,
+	"hh/-nn/mm:tt@dd": false,
+	"hh/nn/-mm:tt@dd": false,
+	"hh/nn/mm:-tt@dd": false,
+	"hh/nn/mm:tt@-dd": false,
+
+	"": false,
+
+	// hosts
+	"host:https/namespace/model:tag": true,
+
+	// colon in non-host part before tag
+	"host/name:space/model:tag": false,
+}
+
+func TestNameparseNameDefault(t *testing.T) {
+	const name = "xx"
+	n := ParseName(name)
+	got := n.String()
+	want := "registry.ollama.ai/library/xx:latest"
+	if got != want {
+		t.Errorf("parseName(%q).String() = %q; want %q", name, got, want)
+	}
+}
+
+func TestNameIsValid(t *testing.T) {
+	var numStringTests int
+	for s, want := range testCases {
+		n := ParseNameBare(s)
+		t.Logf("n: %#v", n)
+		got := n.IsValid()
+		if got != want {
+			t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want)
+		}
+
+		// Test roundtrip with String
+		if got {
+			got := ParseNameBare(s).String()
+			if got != s {
+				t.Errorf("parseName(%q).String() = %q; want %q", s, got, s)
+			}
+			numStringTests++
+		}
+	}
+
+	if numStringTests == 0 {
+		t.Errorf("no tests for Name.String")
+	}
+}
+
+func TestNameIsValidPart(t *testing.T) {
 	cases := []struct {
-		name         string
-		in           string
-		wantString   string
-		wantGoString string // default is tt.in
+		kind partKind
+		s    string
+		want bool
 	}{
-		{
-			name:         "Complete Name",
-			in:           "example.com/library/mistral:latest+Q4_0",
-			wantGoString: "example.com/library/mistral:latest+Q4_0@?",
-		},
-		{
-			name:         "Short Name",
-			in:           "mistral:latest",
-			wantGoString: "?/?/mistral:latest+?@?",
-		},
-		{
-			name:         "Long Name",
-			in:           "library/mistral:latest",
-			wantGoString: "?/library/mistral:latest+?@?",
-		},
-		{
-			name:         "Case Preserved",
-			in:           "Library/Mistral:Latest",
-			wantGoString: "?/Library/Mistral:Latest+?@?",
-		},
-		{
-			name:         "With digest",
-			in:           "Library/Mistral:Latest@sha256-123456",
-			wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
-		},
+		{kind: kindHost, s: "", want: false},
+		{kind: kindHost, s: "a", want: true},
+		{kind: kindHost, s: "a.", want: true},
+		{kind: kindHost, s: "a.b", want: true},
+		{kind: kindHost, s: "a:123", want: true},
+		{kind: kindHost, s: "a:123/aa/bb", want: false},
+		{kind: kindNamespace, s: "bb", want: true},
+		{kind: kindNamespace, s: "a.", want: false},
+		{kind: kindModel, s: "-h", want: false},
+		{kind: kindDigest, s: "sha256-1000000000000000000000000000000000000000000000000000000000000000", want: true},
 	}
-
 	for _, tt := range cases {
-		t.Run(tt.name, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
-			if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
-				t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
+		t.Run(tt.s, func(t *testing.T) {
+			got := isValidPart(tt.kind, tt.s)
+			if got != tt.want {
+				t.Errorf("isValidPart(%s, %q) = %v; want %v", tt.kind, tt.s, got, tt.want)
 			}
 		})
 	}
+
 }

-func TestDisplayLongest(t *testing.T) {
-	g := ParseNameFill("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
-	if g != "example.com/library/mistral:latest" {
-		t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
+func FuzzName(f *testing.F) {
+	for s := range testCases {
+		f.Add(s)
 	}
-}
-
-func TestDisplayShortest(t *testing.T) {
-	cases := []struct {
-		in        string
-		mask      string
-		want      string
-		wantPanic bool
-	}{
-		{"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
-		{"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false},
-		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
-		{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
-
-		// case-insensitive
-		{"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
-		{"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
-		{"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false},
-		{"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false},
-		{"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false},
-
-		// zero value
-		{"", MaskDefault, "", true},
-
-		// invalid mask
-		{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
-
-		// DefaultMask
-		{"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false},
-
-		// Auto-Fill
-		{"x", "example.com/library/_:latest", "x", false},
-		{"x", "example.com/library/_:latest+Q4_0", "x", false},
-		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
-		{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
-	}
-
-	for _, tt := range cases {
-		t.Run("", func(t *testing.T) {
-			defer func() {
-				if tt.wantPanic {
-					if recover() == nil {
-						t.Errorf("expected panic")
-					}
+	f.Fuzz(func(t *testing.T, s string) {
+		n := ParseNameBare(s)
+		if n.IsValid() {
+			parts := [...]string{n.Host, n.Namespace, n.Model, n.Tag, n.RawDigest}
+			for _, part := range parts {
+				if part == ".." {
+					t.Errorf("unexpected .. as valid part")
+				}
+				if len(part) > 350 {
+					t.Errorf("part too long: %q", part)
 				}
-			}()
-
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			if g := p.DisplayShortest(tt.mask); g != tt.want {
-				t.Errorf("got = %q; want %q", g, tt.want)
 			}
-		})
-	}
-}
-
-func TestParseNameAllocs(t *testing.T) {
-	allocs := testing.AllocsPerRun(1000, func() {
-		keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
-	})
-	if allocs > 0 {
-		t.Errorf("ParseName allocs = %v; want 0", allocs)
-	}
-}
-
-func BenchmarkParseName(b *testing.B) {
-	b.ReportAllocs()
-
-	for range b.N {
-		keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
-	}
-}
-
-func FuzzParseNameFromFilepath(f *testing.F) {
-	f.Add("example.com/library/mistral/7b/Q4_0")
-	f.Add("example.com/../mistral/7b/Q4_0")
-	f.Add("example.com/x/../7b/Q4_0")
-	f.Add("example.com/x/../7b")
-	f.Fuzz(func(t *testing.T, s string) {
-		name := ParseNameFromFilepath(s, FillNothing)
-		if strings.Contains(s, "..") && !name.IsZero() {
-			t.Fatalf("non-zero value for path with '..': %q", s)
-		}
-		if name.IsValid() == name.IsZero() {
-			t.Errorf("expected valid path to be non-zero value; got %#v", name)
+			if n.String() != s {
+				t.Errorf("String() = %q; want %q", n.String(), s)
+			}
 		}
+
 	})
 }
-
-func FuzzParseName(f *testing.F) {
-	f.Add("example.com/mistral:7b+Q4_0")
-	f.Add("example.com/mistral:7b+q4_0")
-	f.Add("example.com/mistral:7b+x")
-	f.Add("x/y/z:8n+I")
-	f.Add(":x")
-	f.Add("@sha256-123456")
-	f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
-	f.Add(":@!@")
-	f.Add("...")
-	f.Fuzz(func(t *testing.T, s string) {
-		r0 := ParseNameFill(s, FillNothing)
-
-		if strings.Contains(s, "..") && !r0.IsZero() {
-			t.Fatalf("non-zero value for path with '..': %q", s)
-		}
-
-		if !r0.IsValid() && !r0.IsResolved() {
-			if !r0.EqualFold(Name{}) {
-				t.Errorf("expected invalid path to be zero value; got %#v", r0)
-			}
-			t.Skipf("invalid path: %q", s)
-		}
-
-		for _, p := range r0.parts {
-			if len(p) > MaxNamePartLen {
-				t.Errorf("part too long: %q", p)
-			}
-		}
-
-		if !strings.EqualFold(r0.DisplayLong(), s) {
-			t.Errorf("String() did not round-trip with case insensitivity: %q\ngot  = %q\nwant = %q", s, r0.DisplayLong(), s)
-		}
-
-		r1 := ParseNameFill(r0.DisplayLong(), FillNothing)
-		if !r0.EqualFold(r1) {
-			t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
-		}
-	})
-}
-
-func TestNameStringAllocs(t *testing.T) {
-	name := ParseNameFill("example.com/ns/mistral:latest+Q4_0", FillNothing)
-	allocs := testing.AllocsPerRun(1000, func() {
-		keep(name.DisplayLong())
-	})
-	if allocs > 1 {
-		t.Errorf("String allocs = %v; want 0", allocs)
-	}
-}
-
-func TestNamePath(t *testing.T) {
-	cases := []struct {
-		in   string
-		want string
-	}{
-		{"example.com/library/mistral:latest+Q4_0", "example.com/library/mistral:latest"},
-
-		// incomplete
-		{"example.com/library/mistral:latest", "example.com/library/mistral:latest"},
-		{"", ""},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			if g := p.DisplayURLPath(); g != tt.want {
-				t.Errorf("got = %q; want %q", g, tt.want)
-			}
-		})
-	}
-}
-
-func TestNameFilepath(t *testing.T) {
-	cases := []struct {
-		in          string
-		want        string
-		wantNoBuild string
-	}{
-		{
-			in:          "example.com/library/mistral:latest+Q4_0",
-			want:        "example.com/library/mistral/latest/Q4_0",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "Example.Com/Library/Mistral:Latest+Q4_0",
-			want:        "example.com/library/mistral/latest/Q4_0",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "Example.Com/Library/Mistral:Latest+Q4_0",
-			want:        "example.com/library/mistral/latest/Q4_0",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "example.com/library/mistral:latest",
-			want:        "example.com/library/mistral/latest",
-			wantNoBuild: "example.com/library/mistral/latest",
-		},
-		{
-			in:          "",
-			want:        "",
-			wantNoBuild: "",
-		},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			p := ParseNameFill(tt.in, FillNothing)
-			t.Logf("ParseName(%q) = %#v", tt.in, p)
-			g := p.Filepath()
-			g = filepath.ToSlash(g)
-			if g != tt.want {
-				t.Errorf("got = %q; want %q", g, tt.want)
-			}
-			g = p.FilepathNoBuild()
-			g = filepath.ToSlash(g)
-			if g != tt.wantNoBuild {
-				t.Errorf("got = %q; want %q", g, tt.wantNoBuild)
-			}
-		})
-	}
-}
-
-func TestParseNameFilepath(t *testing.T) {
-	cases := []struct {
-		in   string
-		fill string // default is FillNothing
-		want string
-	}{
-		{
-			in:   "example.com/library/mistral/latest/Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "example.com/library/mistral/latest",
-			fill: "?/?/?:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "example.com/library/mistral",
-			fill: "?/?/?:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "example.com/library",
-			want: "",
-		},
-		{
-			in:   "example.com/",
-			want: "",
-		},
-		{
-			in:   "example.com/^/mistral/latest/Q4_0",
-			want: "",
-		},
-		{
-			in:   "example.com/library/mistral/../Q4_0",
-			want: "",
-		},
-		{
-			in:   "example.com/library/mistral/latest/Q4_0/extra",
-			want: "",
-		},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
-			fill := cmp.Or(tt.fill, FillNothing)
-			want := ParseNameFill(tt.want, fill)
-			if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) {
-				t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
-			}
-		})
-	}
-}
-
-func TestParseNameFromPath(t *testing.T) {
-	cases := []struct {
-		in   string
-		want string
-		fill string // default is FillNothing
-	}{
-		{
-			in:   "example.com/library/mistral:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "/example.com/library/mistral:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "/example.com/library/mistral",
-			want: "example.com/library/mistral",
-		},
-		{
-			in:   "/example.com/library/mistral",
-			fill: "?/?/?:latest+Q4_0",
-			want: "example.com/library/mistral:latest+Q4_0",
-		},
-		{
-			in:   "/example.com/library",
-			want: "",
-		},
-		{
-			in:   "/example.com/",
-			want: "",
-		},
-		{
-			in:   "/example.com/^/mistral/latest",
-			want: "",
-		},
-	}
-	for _, tt := range cases {
-		t.Run(tt.in, func(t *testing.T) {
-			fill := cmp.Or(tt.fill, FillNothing)
-			if g := ParseNameFromURLPath(tt.in, fill); g.DisplayLong() != tt.want {
-				t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
-			}
-		})
-	}
-}
-
-func ExampleName_MapHash() {
-	m := map[uint64]bool{}
-
-	// key 1
-	m[ParseNameFill("mistral:latest+q4", FillNothing).MapHash()] = true
-	m[ParseNameFill("miSTRal:latest+Q4", FillNothing).MapHash()] = true
-	m[ParseNameFill("mistral:LATest+Q4", FillNothing).MapHash()] = true
-
-	// key 2
-	m[ParseNameFill("mistral:LATest", FillNothing).MapHash()] = true
-
-	fmt.Println(len(m))
-	// Output:
-	// 2
-}
-
-func ExampleName_CompareFold_sort() {
-	names := []Name{
-		ParseNameFill("mistral:latest", FillNothing),
-		ParseNameFill("mistRal:7b+q4", FillNothing),
-		ParseNameFill("MIstral:7b", FillNothing),
-	}
-
-	slices.SortFunc(names, Name.CompareFold)
-
-	for _, n := range names {
-		fmt.Println(n.DisplayLong())
-	}
-
-	// Output:
-	// MIstral:7b
-	// mistRal:7b+q4
-	// mistral:latest
-}
-
-func ExampleName_completeAndResolved() {
-	for _, s := range []string{
-		"x/y/z:latest+q4_0@sha123-abc",
-		"x/y/z:latest+q4_0",
-		"@sha123-abc",
-	} {
-		name := ParseNameFill(s, FillNothing)
-		fmt.Printf("complete:%v resolved:%v  digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
-	}
-
-	// Output:
-	// complete:true resolved:true  digest:sha123-abc
-	// complete:true resolved:false  digest:
-	// complete:false resolved:true  digest:sha123-abc
-}
-
-func ExampleName_DisplayShortest() {
-	name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
-
-	fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
-	fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
-	fmt.Println(name.DisplayShortest("example.com/_/_:_"))
-	fmt.Println(name.DisplayShortest("_/_/_:_"))
-
-	// Default
-	name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
-	fmt.Println(name.DisplayShortest(""))
-
-	// Output:
-	// mistral
-	// jmorganca/mistral
-	// jmorganca/mistral:latest
-	// example.com/jmorganca/mistral:latest
-	// mistral
-}
-
-func keep[T any](v T) T { return v }
--- a/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
+++ b/types/model/testdata/fuzz/FuzzParseRef/1d43ee52085cb4aa
@@ -1,2 +1,2 @@
 go test fuzz v1
-string("/0")
+string("00@")
--- a/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
+++ b/types/model/testdata/fuzz/FuzzParseRef/27fd759314f0e6d6
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("0//0")
--- a/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
+++ b/types/model/testdata/fuzz/FuzzParseRef/3e3b70dba384074d
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("0 /0")
--- a/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
+++ b/types/model/testdata/fuzz/FuzzParseRef/71f1fdff711b6dab
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("+0/00000")
--- a/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
+++ b/types/model/testdata/fuzz/FuzzParseRef/82c2975c430ac608
@@ -1,2 +0,0 @@
-go test fuzz v1
-string(":")
--- a/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
+++ b/types/model/testdata/fuzz/FuzzParseRef/b51b1c875e61a948
@@ -1,2 +0,0 @@
-go test fuzz v1
-string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")
Author	SHA1	Message	Date
jmorganca	201a987ff9	some more menu options...	2024-04-28 12:40:52 -04:00
jmorganca	2d8125042a	Touch ID for cli install; server restarts	2024-04-27 22:42:38 -04:00
jmorganca	776e7bb5e4	app: fix status item icons	2024-04-27 15:57:57 -04:00
jmorganca	b8d7ca1a7b	Native implementation of macOS app	2024-04-27 14:20:10 -04:00
Blake Mizerany	2bed62926e	types/model: remove Digest (for now) (#3970 ) The Digest type needs more thought and is not necessary at the moment.	2024-04-26 21:14:28 -07:00
Jeffrey Morgan	aad8d128a0	also look at cwd as a root for windows runners (#3959 )	2024-04-26 19:14:08 -04:00
Daniel Hiltgen	ec1acbb867	Merge pull request #3968 from dhiltgen/win_generate Fine grain control over windows generate steps	2024-04-26 16:03:38 -07:00
Daniel Hiltgen	e4859c4563	Fine grain control over windows generate steps This will speed up CI which already tries to only build static for unit tests	2024-04-26 15:49:46 -07:00
Nataly Merezhuk	8e30eb26bd	Updates the setup command to use llama3. (#3962 )	2024-04-26 18:41:01 -04:00
Daniel Hiltgen	0b5c589ca2	Merge pull request #3966 from dhiltgen/bump Fix target in gen_windows.ps1	2024-04-26 15:36:53 -07:00
Michael Yang	65fadddc85	Merge pull request #3964 from ollama/mxyng/weights fix gemma, command-r layer weights	2024-04-26 15:23:33 -07:00
Daniel Hiltgen	ed5fb088c4	Fix target in gen_windows.ps1	2024-04-26 15:10:42 -07:00
Michael Yang	f81f308118	fix gemma, command-r layer weights	2024-04-26 15:00:55 -07:00
Blake Mizerany	b1390a7b37	types/model: export ParseNameBare and Merge (#3957 ) These are useful outside this package.	2024-04-26 14:58:07 -07:00
Michael Yang	11d83386a5	Merge pull request #3951 from ollama/mxyng/zip check file type before zip	2024-04-26 14:51:23 -07:00
Jeffrey Morgan	bb31def011	return code `499` when user cancels request while a model is loading (#3955 )	2024-04-26 17:38:29 -04:00
Michael Yang	41e03ede95	check file type before zip	2024-04-26 14:18:07 -07:00
Michael Yang	7fea1ecdf6	Merge pull request #3958 from ollama/mxyng/fix-workflow use merge base for diff-tree	2024-04-26 14:17:56 -07:00
Blake Mizerany	054894271d	.github/workflows/test.yaml: add in-flight cancellations on new push (#3956 ) Also, remove a superfluous 'go get'	2024-04-26 13:54:24 -07:00
Michael Yang	6fef042f0b	use merge base for diff-tree	2024-04-26 13:54:15 -07:00
Daniel Hiltgen	5c0c2d1d09	Merge pull request #3954 from dhiltgen/ci_fixes Put back non-avx CPU build for windows	2024-04-26 13:09:03 -07:00
Blake Mizerany	37f9c8ad99	types/model: overhaul Name and Digest types (#3924 )	2024-04-26 13:08:32 -07:00
Quinten van Buul	2a80f55e2a	Update windows.md (#3855 ) Fixed a typo	2024-04-26 16:04:15 -04:00
Daniel Hiltgen	421c878a2d	Put back non-avx CPU build for windows	2024-04-26 12:44:07 -07:00
Daniel Hiltgen	36666c2142	Merge pull request #3925 from dhiltgen/bump Bump llama.cpp to b2737	2024-04-26 10:09:38 -07:00
Daniel Hiltgen	85801317d1	Fix clip log import	2024-04-26 09:43:46 -07:00
Daniel Hiltgen	2ed0d65948	Bump llama.cpp to b2737	2024-04-26 09:43:28 -07:00
Daniel Hiltgen	d459dc4ad1	Merge pull request #3950 from dhiltgen/windows_packaging Fix exe name for zip packaging on windows	2024-04-26 09:27:37 -07:00
Daniel Hiltgen	40bc4622ef	Fix exe name for zip packaging on windows The zip file encodes the OS and architecture, so keep the short exe name	2024-04-26 09:18:05 -07:00
Daniel Hiltgen	c0f818a07a	Merge pull request #3948 from dhiltgen/win_generate Refactor windows generate for more modular usage	2024-04-26 09:17:20 -07:00
Daniel Hiltgen	8671fdeda6	Refactor windows generate for more modular usage	2024-04-26 08:35:50 -07:00
Daniel Hiltgen	2619850fb4	Merge pull request #3933 from dhiltgen/ci_fixes Move cuda/rocm dependency gathering into generate script	2024-04-26 07:01:24 -07:00
Daniel Hiltgen	8feb97dc0d	Move cuda/rocm dependency gathering into generate script This will make it simpler for CI to accumulate artifacts from prior steps	2024-04-25 22:38:44 -07:00
Daniel Hiltgen	4e1ff6dcbb	Merge pull request #3926 from dhiltgen/ci_fixes Fix release CI	2024-04-25 17:42:31 -07:00
Daniel Hiltgen	8589d752ac	Fix release CI download-artifact path was being used incorrectly. It is where to extract the zip not the files in the zip to extract. Default is workspace dir which is what we want, so omit it	2024-04-25 17:27:11 -07:00
Michael Yang	de4ded68b0	Merge pull request #3923 from ollama/mxyng/mem only count output tensors	2024-04-25 16:34:17 -07:00
Daniel Hiltgen	9b5a3c5991	Merge pull request #3914 from dhiltgen/mac_perf Improve mac parallel performance	2024-04-25 16:28:31 -07:00
Jeffrey Morgan	00b0699c75	Reload model if `num_gpu` changes (#3920 ) * reload model if `num_gpu` changes * dont reload on -1 * fix tests	2024-04-25 19:02:40 -04:00
Jeffrey Morgan	993cf8bf55	llm: limit generation to 10x context size to avoid run on generations (#3918 ) * llm: limit generation to 10x context size to avoid run on generations * add comment * simplify condition statement	2024-04-25 19:02:30 -04:00
Michael Yang	7bb7cb8a60	only count output tensors	2024-04-25 15:24:08 -07:00
Daniel Hiltgen	b123be5b71	Adjust context size for parallelism	2024-04-25 13:58:54 -07:00
jmorganca	ddf5c09a9b	use matrix multiplcation kernels in more cases	2024-04-25 13:58:54 -07:00
Roy Yang	5f73c08729	Remove trailing spaces (#3889 )	2024-04-25 14:32:26 -04:00
Daniel Hiltgen	f503a848c2	Merge pull request #3895 from brycereitano/shiftloading Move ggml loading to when attempting to fit	2024-04-25 09:24:08 -07:00
Bryce Reitano	36a6daccab	Restructure loading conditional chain	2024-04-24 17:37:03 -06:00
Bryce Reitano	ceb0e26e5e	Provide variable ggml for TestLoad	2024-04-24 17:19:55 -06:00
Bryce Reitano	284e02bed0	Move ggml loading to when we attempt fitting	2024-04-24 17:17:24 -06:00
Michael Yang	3450a57d4a	Merge pull request #3713 from ollama/mxyng/modelname update copy handler to use model.Name	2024-04-24 16:00:32 -07:00
Michael Yang	592dae31c8	update copy to use model.Name	2024-04-24 15:54:54 -07:00
Michael Yang	2010cbc5fa	Merge pull request #3833 from ollama/mxyng/fix-from fix: from blob	2024-04-24 15:13:47 -07:00
Michael Yang	ac0801eced	only replace if it matches command	2024-04-24 14:49:26 -07:00
Michael Yang	ad66e5b060	split temp zip files	2024-04-24 14:18:01 -07:00