Add support for IQ1_S, IQ3_S, IQ2_S, IQ4_XS. IQ4_NL

Co-authored-by: ManniX-ITA <20623405+mann1x@users.noreply.github.com>
s/DisplayLongest/String/
2026-02-25 19:46:55 -05:00 · 2024-05-03 14:51:07 -07:00 · 2024-05-03 13:18:28 -07:00 · 2024-05-03 13:18:28 -07:00 · 2024-05-03 13:18:28 -07:00 · 2024-05-03 13:18:28 -07:00
126 changed files with 20215 additions and 1926 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ ggml-metal.metal
 test_data
 *.crt
 llm/build
+__debug_bin*
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <div align="center">
-  <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
+ <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
 </div>

 # Ollama
@@ -51,7 +51,7 @@ Here are some example models that can be downloaded:
 | ------------------ | ---------- | ----- | ------------------------------ |
 | Llama 3            | 8B         | 4.7GB | `ollama run llama3`            |
 | Llama 3            | 70B        | 40GB  | `ollama run llama3:70b`        |
-| Phi-3              | 3,8B       | 2.3GB | `ollama run phi3`              |
+| Phi-3              | 3.8B       | 2.3GB | `ollama run phi3`              |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
 | Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
 | Starling           | 7B         | 4.1GB | `ollama run starling-lm`       |
@@ -173,7 +173,7 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
 The image features a yellow smiley face, which is likely the central focus of the picture.
 ```

-### Pass in prompt as arguments
+### Pass the prompt as an argument

 ```
 $ ollama run llama3 "Summarize this file: $(cat README.md)"
@@ -294,7 +294,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
 - [chat: chat web app for teams](https://github.com/swuecho/chat)
 - [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
- [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
+- [Ollama RAG Chatbot: Local Chat with multiple PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)

 ### Terminal

@@ -384,4 +384,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)

 ### Supported backends 
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
+- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
--- a/api/client.go
+++ b/api/client.go
@@ -18,6 +18,7 @@ import (
 	"net/url"
 	"os"
 	"runtime"
+	"strconv"
 	"strings"

 	"github.com/ollama/ollama/format"
@@ -57,12 +58,36 @@ func checkError(resp *http.Response, body []byte) error {
 // If the variable is not specified, a default ollama host and port will be
 // used.
 func ClientFromEnvironment() (*Client, error) {
+	ollamaHost, err := GetOllamaHost()
+	if err != nil {
+		return nil, err
+	}
+
+	return &Client{
+		base: &url.URL{
+			Scheme: ollamaHost.Scheme,
+			Host:   net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
+		},
+		http: http.DefaultClient,
+	}, nil
+}
+
+type OllamaHost struct {
+	Scheme string
+	Host   string
+	Port   string
+}
+
+func GetOllamaHost() (OllamaHost, error) {
 	defaultPort := "11434"

-	scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://")
+	hostVar := os.Getenv("OLLAMA_HOST")
+	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
+
+	scheme, hostport, ok := strings.Cut(hostVar, "://")
 	switch {
 	case !ok:
-		scheme, hostport = "http", os.Getenv("OLLAMA_HOST")
+		scheme, hostport = "http", hostVar
 	case scheme == "http":
 		defaultPort = "80"
 	case scheme == "https":
@@ -82,12 +107,14 @@ func ClientFromEnvironment() (*Client, error) {
 		}
 	}

-	return &Client{
-		base: &url.URL{
-			Scheme: scheme,
-			Host:   net.JoinHostPort(host, port),
-		},
-		http: http.DefaultClient,
+	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
+		return OllamaHost{}, ErrInvalidHostPort
+	}
+
+	return OllamaHost{
+		Scheme: scheme,
+		Host:   host,
+		Port:   port,
 	}, nil
 }

--- a/api/client_test.go
+++ b/api/client_test.go
@@ -1,6 +1,12 @@
 package api

-import "testing"
+import (
+	"fmt"
+	"net"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)

 func TestClientFromEnvironment(t *testing.T) {
 	type testCase struct {
@@ -40,4 +46,40 @@ func TestClientFromEnvironment(t *testing.T) {
 			}
 		})
 	}
+
+	hostTestCases := map[string]*testCase{
+		"empty":               {value: "", expect: "127.0.0.1:11434"},
+		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
+		"only port":           {value: ":1234", expect: ":1234"},
+		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
+		"hostname":            {value: "example.com", expect: "example.com:11434"},
+		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
+		"zero port":           {value: ":0", expect: ":0"},
+		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
+		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
+		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
+		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
+		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
+		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
+		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
+		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
+		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
+		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
+	}
+
+	for k, v := range hostTestCases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_HOST", v.value)
+
+			oh, err := GetOllamaHost()
+			if err != v.err {
+				t.Fatalf("expected %s, got %s", v.err, err)
+			}
+
+			if err == nil {
+				host := net.JoinHostPort(oh.Host, oh.Port)
+				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
+			}
+		})
+	}
 }
--- a/api/types.go
+++ b/api/types.go
@@ -309,6 +309,7 @@ func (m *Metrics) Summary() {
 }

 var ErrInvalidOpts = errors.New("invalid options")
+var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")

 func (opts *Options) FromMap(m map[string]interface{}) error {
 	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
--- a/app/.gitignore
+++ b/app/.gitignore
@@ -1,2 +1 @@
 ollama.syso
-app
--- a/app/AppDelegate.h
+++ b/app/AppDelegate.h
@@ -1,7 +0,0 @@
-#import <Cocoa/Cocoa.h>
-
-@interface AppDelegate : NSObject <NSApplicationDelegate>
-
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
-
-@end
--- a/app/README.md
+++ b/app/README.md
@@ -1,6 +1,10 @@
 # Ollama App

-## macOS
+## Linux
+
+TODO
+
+## MacOS

 TODO

--- a/app/app_darwin.go
+++ b/app/app_darwin.go
@@ -1,76 +0,0 @@
-package main
-
-// #cgo CFLAGS: -x objective-c
-// #cgo LDFLAGS: -framework Cocoa -framework LocalAuthentication -framework ServiceManagement
-// #include "app_darwin.h"
-import "C"
-import (
-	"context"
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"syscall"
-)
-
-func init() {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		panic(err)
-	}
-
-	ServerLogFile = filepath.Join(home, ".ollama", "logs", "server.log")
-}
-
-func run() {
-	initLogging()
-	slog.Info("ollama macOS app started")
-
-	// Ask to move to applications directory
-	moving := C.askToMoveToApplications()
-	if moving {
-		return
-	}
-
-	C.killOtherInstances()
-
-	code := C.installSymlink()
-	if code != 0 {
-		slog.Error("Failed to install symlink")
-	}
-
-	exe, err := os.Executable()
-	if err != nil {
-		panic(err)
-	}
-
-	var options ServerOptions
-
-	ctx, cancel := context.WithCancel(context.Background())
-	var done chan int
-
-	done, err = SpawnServer(ctx, filepath.Join(filepath.Dir(exe), "..", "Resources", "ollama"), options)
-	if err != nil {
-		slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
-		done = make(chan int, 1)
-		done <- 1
-	}
-
-	// Run the native macOS app
-	// Note: this will block until the app is closed
-	C.run()
-
-	slog.Info("ollama macOS app closed")
-
-	cancel()
-	slog.Info("Waiting for ollama server to shutdown...")
-	if done != nil {
-		<-done
-	}
-	slog.Info("Ollama app exiting")
-}
-
-//export Quit
-func Quit() {
-	syscall.Kill(os.Getpid(), syscall.SIGTERM)
-}
--- a/app/app_darwin.h
+++ b/app/app_darwin.h
@@ -1,13 +0,0 @@
-#import <Cocoa/Cocoa.h>
-
-@interface AppDelegate : NSObject <NSApplicationDelegate>
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
-@end
-
-void run();
-void killOtherInstances();
-bool askToMoveToApplications();
-int createSymlinkWithAuthorization();
-int installSymlink();
-extern void Restart();
-extern void Quit();
--- a/app/app_darwin.m
+++ b/app/app_darwin.m
@@ -1,282 +0,0 @@
-#import <AppKit/AppKit.h>
-#import <Cocoa/Cocoa.h>
-#import <CoreServices/CoreServices.h>
-#import <Security/Security.h>
-#import <ServiceManagement/ServiceManagement.h>
-#import "app_darwin.h"
-
-@interface AppDelegate ()
-
-@property (strong, nonatomic) NSStatusItem *statusItem;
-
-@end
-
-@implementation AppDelegate
-
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
-    // show status menu
-    NSMenu *menu = [[NSMenu alloc] init];
-
-    NSMenuItem *aboutMenuItem = [[NSMenuItem alloc] initWithTitle:@"About Ollama" action:@selector(aboutOllama) keyEquivalent:@""];
-    [aboutMenuItem setTarget:self];
-    [menu addItem:aboutMenuItem];
-
-    // Settings submenu
-    NSMenu *settingsMenu = [[NSMenu alloc] initWithTitle:@"Settings"];
-
-    // Submenu items
-    NSMenuItem *chooseModelDirectoryItem = [[NSMenuItem alloc] initWithTitle:@"Choose model directory..." action:@selector(chooseModelDirectory) keyEquivalent:@""];
-    [chooseModelDirectoryItem setTarget:self];
-    [chooseModelDirectoryItem setEnabled:YES];
-    [settingsMenu addItem:chooseModelDirectoryItem];
-
-    NSMenuItem *exposeExternallyItem = [[NSMenuItem alloc] initWithTitle:@"Allow external connections" action:@selector(toggleExposeExternally:) keyEquivalent:@""];
-    [exposeExternallyItem setTarget:self];
-    [exposeExternallyItem setState:NSOffState]; // Set initial state to off
-    [exposeExternallyItem setEnabled:YES];
-    [settingsMenu addItem:exposeExternallyItem];
-
-    NSMenuItem *allowCrossOriginItem = [[NSMenuItem alloc] initWithTitle:@"Allow browser requests" action:@selector(toggleCrossOrigin:) keyEquivalent:@""];
-    [allowCrossOriginItem setTarget:self];
-    [allowCrossOriginItem setState:NSOffState]; // Set initial state to off
-    [allowCrossOriginItem setEnabled:YES];
-    [settingsMenu addItem:allowCrossOriginItem];
-
-    NSMenuItem *settingsMenuItem = [[NSMenuItem alloc] initWithTitle:@"Settings" action:nil keyEquivalent:@""];
-    [settingsMenuItem setSubmenu:settingsMenu];
-    [menu addItem:settingsMenuItem];
-
-    [menu addItemWithTitle:@"Quit Ollama" action:@selector(quit) keyEquivalent:@"q"];
-
-    self.statusItem = [[NSStatusBar systemStatusBar] statusItemWithLength:NSVariableStatusItemLength];
-    [self.statusItem addObserver:self forKeyPath:@"button.effectiveAppearance" options:NSKeyValueObservingOptionNew|NSKeyValueObservingOptionInitial context:nil];
-
-    self.statusItem.menu = menu;
-    [self showIcon];
-}
-
- (void)aboutOllama {
-    [[NSApplication sharedApplication] orderFrontStandardAboutPanel:nil];
-}
-
- (void)toggleCrossOrigin:(id)sender {
-    NSMenuItem *item = (NSMenuItem *)sender;
-    if ([item state] == NSOffState) {
-        // Do something when cross-origin requests are allowed
-        [item setState:NSOnState];
-    } else {
-        // Do something when cross-origin requests are disallowed
-        [item setState:NSOffState];
-    }
-}
-
- (void)toggleExposeExternally:(id)sender {
-    NSMenuItem *item = (NSMenuItem *)sender;
-    if ([item state] == NSOffState) {
-        // Do something when Ollama is exposed externally
-        [item setState:NSOnState];
-    } else {
-        // Do something when Ollama is not exposed externally
-        [item setState:NSOffState];
-    }
-}
-
- (void)chooseModelDirectory {
-    NSOpenPanel *openPanel = [NSOpenPanel openPanel];
-    [openPanel setCanChooseFiles:NO];
-    [openPanel setCanChooseDirectories:YES];
-    [openPanel setAllowsMultipleSelection:NO];
-
-    NSInteger result = [openPanel runModal];
-    if (result == NSModalResponseOK) {
-        NSURL *selectedDirectoryURL = [openPanel URLs].firstObject;
-        // Do something with the selected directory URL
-    }
-}
-
-(void) showIcon {
-    NSAppearance* appearance = self.statusItem.button.effectiveAppearance;
-    NSString* appearanceName = (NSString*)(appearance.name);
-    NSString* iconName = [[appearanceName lowercaseString] containsString:@"dark"] ? @"iconDark" : @"icon";
-    NSImage* statusImage = [NSImage imageNamed:iconName];
-    [statusImage setTemplate:YES];
-    self.statusItem.button.image = statusImage;
-}
-
-(void)observeValueForKeyPath:(NSString *)keyPath ofObject:(id)object change:(NSDictionary<NSKeyValueChangeKey,id> *)change context:(void *)context {
-    [self showIcon];
-}
-
- (void)quit {
-    [NSApp stop:nil];
-}
-
-@end
-
-void run() {
-    @autoreleasepool {
-        [NSApplication sharedApplication];
-        AppDelegate *appDelegate = [[AppDelegate alloc] init];
-        [NSApp setDelegate:appDelegate];
-        [NSApp run];
-    }
-}
-
-// killOtherInstances kills all other instances of the app currently
-// running. This way we can ensure that only the most recently started
-// instance of Ollama is running
-void killOtherInstances() {
-    pid_t pid = getpid();
-    NSArray *all = [[NSWorkspace sharedWorkspace] runningApplications];
-    NSMutableArray *apps = [NSMutableArray array];
-
-    for (NSRunningApplication *app in all) {
-        if ([app.bundleIdentifier isEqualToString:[[NSBundle mainBundle] bundleIdentifier]] ||
-            [app.bundleIdentifier isEqualToString:@"ai.ollama.ollama"] ||
-            [app.bundleIdentifier isEqualToString:@"com.electron.ollama"]) {
-            if (app.processIdentifier != pid) {
-                [apps addObject:app];
-            }
-        }
-    }
-
-    for (NSRunningApplication *app in apps) {
-        kill(app.processIdentifier, SIGTERM);
-    }
-
-    NSDate *startTime = [NSDate date];
-    for (NSRunningApplication *app in apps) {
-        while (!app.terminated) {
-            if (-[startTime timeIntervalSinceNow] >= 5) {
-                kill(app.processIdentifier, SIGKILL);
-                break;
-            }
-
-            [[NSRunLoop currentRunLoop] runUntilDate:[NSDate dateWithTimeIntervalSinceNow:0.1]];
-        }
-    }
-}
-
-bool askToMoveToApplications() {
-    NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
-    if ([bundlePath hasPrefix:@"/Applications"]) {
-        return false;
-    }
-
-    NSAlert *alert = [[NSAlert alloc] init];
-    [alert setMessageText:@"Move to Applications?"];
-    [alert setInformativeText:@"Ollama works best when run from the Applications directory."];
-    [alert addButtonWithTitle:@"Move to Applications"];
-    [alert addButtonWithTitle:@"Don't move"];
-
-    [NSApp activateIgnoringOtherApps:YES];
-
-    if ([alert runModal] != NSAlertFirstButtonReturn) {
-        return false;
-    }
-
-    // move to applications
-    NSString *applicationsPath = @"/Applications";
-    NSString *newPath = [applicationsPath stringByAppendingPathComponent:@"Ollama.app"];
-    NSFileManager *fileManager = [NSFileManager defaultManager];
-
-    // Check if the newPath already exists
-    if ([fileManager fileExistsAtPath:newPath]) {
-        NSError *removeError = nil;
-        [fileManager removeItemAtPath:newPath error:&removeError];
-        if (removeError) {
-            NSLog(@"Error removing file at %@: %@", newPath, removeError);
-            return false; // or handle the error
-        }
-    }
-
-    NSError *moveError = nil;
-    [fileManager moveItemAtPath:bundlePath toPath:newPath error:&moveError];
-    if (moveError) {
-        NSLog(@"Error moving file from %@ to %@: %@", bundlePath, newPath, moveError);
-        return false;
-    }
-
-    NSLog(@"Opening %@", newPath);
-    NSError *error = nil;
-    NSWorkspace *workspace = [NSWorkspace sharedWorkspace];
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
-    [workspace launchApplicationAtURL:[NSURL fileURLWithPath:newPath]
-               options:NSWorkspaceLaunchNewInstance | NSWorkspaceLaunchDefault
-               configuration:@{}
-               error:&error];
-
-    return true;
-}
-
-int installSymlink() {
-    NSString *linkPath = @"/usr/local/bin/ollama";
-    NSError *error = nil;
-
-    NSFileManager *fileManager = [NSFileManager defaultManager];
-    NSString *symlinkPath = [fileManager destinationOfSymbolicLinkAtPath:linkPath error:&error];
-    NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
-    NSString *execPath = [[NSBundle mainBundle] executablePath];
-    NSString *resPath = [[NSBundle mainBundle] pathForResource:@"ollama" ofType:nil];
-
-    // if the symlink already exists and points to the right place, don't prompt
-    if ([symlinkPath isEqualToString:resPath]) {
-        NSLog(@"symbolic link already exists and points to the right place");
-        return 0;
-    }
-
-    NSString *authorizationPrompt = @"Ollama is trying to install its command line interface (CLI) tool.";
-
-    AuthorizationRef auth = NULL;
-    OSStatus createStatus = AuthorizationCreate(NULL, kAuthorizationEmptyEnvironment, kAuthorizationFlagDefaults, &auth);
-    if (createStatus != errAuthorizationSuccess) {
-        NSLog(@"Error creating authorization");
-        return -1;
-    }
-
-    NSString * bundleIdentifier = [[NSBundle mainBundle] bundleIdentifier];
-    NSString *rightNameString = [NSString stringWithFormat:@"%@.%@", bundleIdentifier, @"auth3"];
-    const char *rightName = rightNameString.UTF8String;
-
-    OSStatus getRightResult = AuthorizationRightGet(rightName, NULL);
-    if (getRightResult == errAuthorizationDenied) {
-        if (AuthorizationRightSet(auth, rightName, (__bridge CFTypeRef _Nonnull)(@(kAuthorizationRuleAuthenticateAsAdmin)), (__bridge CFStringRef _Nullable)(authorizationPrompt), NULL, NULL) != errAuthorizationSuccess) {
-            NSLog(@"Failed to set right");
-            return -1;
-        }
-    }
-
-    AuthorizationItem right = { .name = rightName, .valueLength = 0, .value = NULL, .flags = 0 };
-    AuthorizationRights rights = { .count = 1, .items = &right };
-    AuthorizationFlags flags = (AuthorizationFlags)(kAuthorizationFlagExtendRights | kAuthorizationFlagInteractionAllowed);
-    AuthorizationItem iconAuthorizationItem = {.name = kAuthorizationEnvironmentIcon, .valueLength = 0, .value = NULL, .flags = 0};
-    AuthorizationEnvironment authorizationEnvironment = {.count = 0, .items = NULL};
-
-    BOOL failedToUseSystemDomain = NO;
-    OSStatus copyStatus = AuthorizationCopyRights(auth, &rights, &authorizationEnvironment, flags, NULL);
-    if (copyStatus != errAuthorizationSuccess) {
-        failedToUseSystemDomain = YES;
-
-        if (copyStatus == errAuthorizationCanceled) {
-            NSLog(@"User cancelled authorization");
-            return -1;
-        } else {
-            NSLog(@"Failed copying system domain rights: %d", copyStatus);
-            return -1;
-        }
-    }
-
-    const char *toolPath = "/bin/ln";
-    const char *args[] = {"-s", "-F", [resPath UTF8String], "/usr/local/bin/ollama", NULL};
-    FILE *pipe = NULL;
-
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
-    OSStatus status = AuthorizationExecuteWithPrivileges(auth, toolPath, kAuthorizationFlagDefaults, (char *const *)args, &pipe);
-    if (status != errAuthorizationSuccess) {
-        NSLog(@"Failed to create symlink");
-        return -1;
-    }
-
-    AuthorizationFree(auth, kAuthorizationFlagDestroyRights);
-    return 0;
-}
--- a/app/app_windows.go
+++ b/app/app_windows.go
@@ -1,166 +0,0 @@
-package main
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"log"
-	"log/slog"
-	"os"
-	"os/exec"
-	"os/signal"
-	"path/filepath"
-	"strings"
-	"syscall"
-
-	"github.com/ollama/ollama/app/lifecycle"
-	"github.com/ollama/ollama/app/store"
-	"github.com/ollama/ollama/app/tray"
-	"github.com/ollama/ollama/app/updater"
-)
-
-func init() {
-	AppName += ".exe"
-	CLIName += ".exe"
-	// Logs, configs, downloads go to LOCALAPPDATA
-	localAppData := os.Getenv("LOCALAPPDATA")
-	AppDataDir = filepath.Join(localAppData, "Ollama")
-	AppLogFile = filepath.Join(AppDataDir, "app.log")
-	ServerLogFile = filepath.Join(AppDataDir, "server.log")
-
-	// Executables are stored in APPDATA
-	AppDir = filepath.Join(localAppData, "Programs", "Ollama")
-
-	// Make sure we have PATH set correctly for any spawned children
-	paths := strings.Split(os.Getenv("PATH"), ";")
-	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
-	found := false
-	for _, path := range paths {
-		d, err := filepath.Abs(path)
-		if err != nil {
-			continue
-		}
-		if strings.EqualFold(AppDir, d) {
-			found = true
-		}
-	}
-	if !found {
-		paths = append(paths, AppDir)
-
-		pathVal := strings.Join(paths, ";")
-		slog.Debug("setting PATH=" + pathVal)
-		err := os.Setenv("PATH", pathVal)
-		if err != nil {
-			slog.Error(fmt.Sprintf("failed to update PATH: %s", err))
-		}
-	}
-
-	// Make sure our logging dir exists
-	_, err := os.Stat(AppDataDir)
-	if errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
-			slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
-		}
-	}
-}
-
-func ShowLogs() {
-	cmd_path := "c:\\Windows\\system32\\cmd.exe"
-	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
-	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
-	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
-	err := cmd.Start()
-	if err != nil {
-		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
-	}
-}
-
-func Start() {
-	cmd_path := "c:\\Windows\\system32\\cmd.exe"
-	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
-	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
-	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
-	err := cmd.Start()
-	if err != nil {
-		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
-	}
-}
-
-func run() {
-	initLogging()
-
-	slog.Info("ollama windows app started")
-
-	ctx, cancel := context.WithCancel(context.Background())
-	var done chan int
-
-	t, err := tray.NewTray()
-	if err != nil {
-		log.Fatalf("Failed to start: %s", err)
-	}
-	callbacks := t.GetCallbacks()
-
-	signals := make(chan os.Signal, 1)
-	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
-
-	go func() {
-		slog.Debug("starting callback loop")
-		for {
-			select {
-			case <-callbacks.Quit:
-				slog.Debug("quit called")
-				t.Quit()
-			case <-signals:
-				slog.Debug("shutting down due to signal")
-				t.Quit()
-			case <-callbacks.Update:
-				err := updater.DoUpgrade(cancel, done)
-				if err != nil {
-					slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
-				}
-			case <-callbacks.ShowLogs:
-				ShowLogs()
-			case <-callbacks.DoFirstUse:
-				err := lifecycle.GetStarted()
-				if err != nil {
-					slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
-				}
-			}
-		}
-	}()
-
-	if !store.GetFirstTimeRun() {
-		slog.Debug("First time run")
-		err = t.DisplayFirstUseNotification()
-		if err != nil {
-			slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
-		}
-		store.SetFirstTimeRun(true)
-	} else {
-		slog.Debug("Not first time, skipping first run notification")
-	}
-
-	if isServerRunning(ctx) {
-		slog.Info("Detected another instance of ollama running, exiting")
-		os.Exit(1)
-	}
-
-	done, err = SpawnServer(ctx, CLIName)
-	if err != nil {
-		// TODO - should we retry in a backoff loop?
-		// TODO - should we pop up a warning and maybe add a menu item to view application logs?
-		slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
-		done = make(chan int, 1)
-		done <- 1
-	}
-
-	updater.StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
-
-	t.Run()
-	cancel()
-	slog.Info("Waiting for ollama server to shutdown...")
-	if done != nil {
-		<-done
-	}
-	slog.Info("Ollama app exiting")
-}
--- a/app/darwin/Ollama.app/Contents/Info.plist
+++ b/app/darwin/Ollama.app/Contents/Info.plist
@@ -1,40 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-  <dict>
-    <key>CFBundleDisplayName</key>
-    <string>Ollama</string>
-    <key>CFBundleExecutable</key>
-    <string>Ollama</string>
-    <key>CFBundleIconFile</key>
-    <string>icon.icns</string>
-    <key>CFBundleIdentifier</key>
-    <string>com.ollama.ollama</string>
-    <key>CFBundleInfoDictionaryVersion</key>
-    <string>6.0</string>
-    <key>CFBundleName</key>
-    <string>Ollama</string>
-    <key>CFBundlePackageType</key>
-    <string>APPL</string>
-    <key>CFBundleShortVersionString</key>
-    <string>0.0.0</string>
-    <key>CFBundleVersion</key>
-    <string>0.0.0</string>
-    <key>DTCompiler</key>
-    <string>com.apple.compilers.llvm.clang.1_0</string>
-    <key>DTSDKBuild</key>
-    <string>22E245</string>
-    <key>DTSDKName</key>
-    <string>macosx13.3</string>
-    <key>DTXcode</key>
-    <string>1431</string>
-    <key>DTXcodeBuild</key>
-    <string>14E300c</string>
-    <key>LSApplicationCategoryType</key>
-    <string>public.app-category.developer-tools</string>
-    <key>LSMinimumSystemVersion</key>
-    <string>11.0</string>
-    <key>LSUIElement</key>
-    <true/>
-  </dict>
-</plist>
--- a/app/darwin/Ollama.app/Contents/Resources/icon.png
+++ b/app/darwin/Ollama.app/Contents/Resources/icon.png
--- a/app/darwin/Ollama.app/Contents/Resources/icon@2x.png
+++ b/app/darwin/Ollama.app/Contents/Resources/icon@2x.png
--- a/app/darwin/Ollama.app/Contents/Resources/iconDark.png
+++ b/app/darwin/Ollama.app/Contents/Resources/iconDark.png
--- a/app/darwin/Ollama.app/Contents/Resources/iconDark@2x.png
+++ b/app/darwin/Ollama.app/Contents/Resources/iconDark@2x.png
--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@@ -1,3 +1,5 @@
+//go:build !windows
+
 package lifecycle

 import "fmt"
--- a/app/lifecycle/lifecycle.go
+++ b/app/lifecycle/lifecycle.go
@@ -0,0 +1,92 @@
+package lifecycle
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"log/slog"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"github.com/ollama/ollama/app/store"
+	"github.com/ollama/ollama/app/tray"
+)
+
+func Run() {
+	InitLogging()
+
+	ctx, cancel := context.WithCancel(context.Background())
+	var done chan int
+
+	t, err := tray.NewTray()
+	if err != nil {
+		log.Fatalf("Failed to start: %s", err)
+	}
+	callbacks := t.GetCallbacks()
+
+	signals := make(chan os.Signal, 1)
+	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
+
+	go func() {
+		slog.Debug("starting callback loop")
+		for {
+			select {
+			case <-callbacks.Quit:
+				slog.Debug("quit called")
+				t.Quit()
+			case <-signals:
+				slog.Debug("shutting down due to signal")
+				t.Quit()
+			case <-callbacks.Update:
+				err := DoUpgrade(cancel, done)
+				if err != nil {
+					slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
+				}
+			case <-callbacks.ShowLogs:
+				ShowLogs()
+			case <-callbacks.DoFirstUse:
+				err := GetStarted()
+				if err != nil {
+					slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
+				}
+			}
+		}
+	}()
+
+	// Are we first use?
+	if !store.GetFirstTimeRun() {
+		slog.Debug("First time run")
+		err = t.DisplayFirstUseNotification()
+		if err != nil {
+			slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
+		}
+		store.SetFirstTimeRun(true)
+	} else {
+		slog.Debug("Not first time, skipping first run notification")
+	}
+
+	if IsServerRunning(ctx) {
+		slog.Info("Detected another instance of ollama running, exiting")
+		os.Exit(1)
+	} else {
+		done, err = SpawnServer(ctx, CLIName)
+		if err != nil {
+			// TODO - should we retry in a backoff loop?
+			// TODO - should we pop up a warning and maybe add a menu item to view application logs?
+			slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
+			done = make(chan int, 1)
+			done <- 1
+		}
+	}
+
+	StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
+
+	t.Run()
+	cancel()
+	slog.Info("Waiting for ollama server to shutdown...")
+	if done != nil {
+		<-done
+	}
+	slog.Info("Ollama app exiting")
+}
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -1,4 +1,4 @@
-package main
+package lifecycle

 import (
 	"fmt"
@@ -7,7 +7,7 @@ import (
 	"path/filepath"
 )

-func initLogging() {
+func InitLogging() {
 	level := slog.LevelInfo

 	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
@@ -41,4 +41,6 @@ func initLogging() {
 	})

 	slog.SetDefault(slog.New(handler))
+
+	slog.Info("ollama app started")
 }
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@@ -0,0 +1,9 @@
+//go:build !windows
+
+package lifecycle
+
+import "log/slog"
+
+func ShowLogs() {
+	slog.Warn("ShowLogs not yet implemented")
+}
--- a/app/lifecycle/logging_windows.go
+++ b/app/lifecycle/logging_windows.go
@@ -0,0 +1,19 @@
+package lifecycle
+
+import (
+	"fmt"
+	"log/slog"
+	"os/exec"
+	"syscall"
+)
+
+func ShowLogs() {
+	cmd_path := "c:\\Windows\\system32\\cmd.exe"
+	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
+	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
+	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
+	err := cmd.Start()
+	if err != nil {
+		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
+	}
+}
--- a/app/lifecycle/paths.go
+++ b/app/lifecycle/paths.go
@@ -70,5 +70,10 @@ func init() {
 			}
 		}

+	} else if runtime.GOOS == "darwin" {
+		// TODO
+		AppName += ".app"
+		// } else if runtime.GOOS == "linux" {
+		// TODO
 	}
 }
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -1,4 +1,4 @@
-package main
+package lifecycle

 import (
 	"context"
@@ -14,28 +14,37 @@ import (
 	"github.com/ollama/ollama/api"
 )

-type ServerOptions struct {
-	Cors       bool
-	Expose     bool
-	ModelsPath string
+func getCLIFullPath(command string) string {
+	cmdPath := ""
+	appExe, err := os.Executable()
+	if err == nil {
+		cmdPath = filepath.Join(filepath.Dir(appExe), command)
+		_, err := os.Stat(cmdPath)
+		if err == nil {
+			return cmdPath
+		}
+	}
+	cmdPath, err = exec.LookPath(command)
+	if err == nil {
+		_, err := os.Stat(cmdPath)
+		if err == nil {
+			return cmdPath
+		}
+	}
+	pwd, err := os.Getwd()
+	if err == nil {
+		cmdPath = filepath.Join(pwd, command)
+		_, err = os.Stat(cmdPath)
+		if err == nil {
+			return cmdPath
+		}
+	}
+
+	return command
 }

-func start(ctx context.Context, command string, options ServerOptions) (*exec.Cmd, error) {
-	cmd := getCmd(ctx, command)
-
-	// set environment variables
-	if options.ModelsPath != "" {
-		cmd.Env = append(cmd.Env, fmt.Sprintf("OLLAMA_MODELS=%s", options.ModelsPath))
-	}
-
-	if options.Cors {
-		cmd.Env = append(cmd.Env, "OLLAMA_ORIGINS=*")
-	}
-
-	if options.Expose {
-		cmd.Env = append(cmd.Env, "OLLAMA_HOST=0.0.0.0")
-	}
-
+func start(ctx context.Context, command string) (*exec.Cmd, error) {
+	cmd := getCmd(ctx, getCLIFullPath(command))
 	stdout, err := cmd.StdoutPipe()
 	if err != nil {
 		return nil, fmt.Errorf("failed to spawn server stdout pipe: %w", err)
@@ -50,6 +59,20 @@ func start(ctx context.Context, command string, options ServerOptions) (*exec.Cm
 	if err != nil {
 		return nil, fmt.Errorf("failed to create server log: %w", err)
 	}
+
+	logDir := filepath.Dir(ServerLogFile)
+	_, err = os.Stat(logDir)
+	if err != nil {
+		if !errors.Is(err, os.ErrNotExist) {
+			return nil, fmt.Errorf("stat ollama server log dir %s: %v", logDir, err)
+
+		}
+
+		if err := os.MkdirAll(logDir, 0o755); err != nil {
+			return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
+		}
+	}
+
 	go func() {
 		defer logFile.Close()
 		io.Copy(logFile, stdout) //nolint:errcheck
@@ -103,25 +126,20 @@ func start(ctx context.Context, command string, options ServerOptions) (*exec.Cm
 	return cmd, nil
 }

-func SpawnServer(ctx context.Context, command string, options ServerOptions) (chan int, error) {
-	logDir := filepath.Dir(ServerLogFile)
-	_, err := os.Stat(logDir)
-	if errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(logDir, 0o755); err != nil {
-			return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
-		}
-	}
-
+func SpawnServer(ctx context.Context, command string) (chan int, error) {
 	done := make(chan int)

 	go func() {
 		// Keep the server running unless we're shuttind down the app
 		crashCount := 0
 		for {
-			slog.Info(fmt.Sprintf("starting server..."))
-			cmd, err := start(ctx, command, options)
+			slog.Info("starting server...")
+			cmd, err := start(ctx, command)
 			if err != nil {
+				crashCount++
 				slog.Error(fmt.Sprintf("failed to start server %s", err))
+				time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
+				continue
 			}

 			cmd.Wait() //nolint:errcheck
@@ -147,7 +165,7 @@ func SpawnServer(ctx context.Context, command string, options ServerOptions) (ch
 	return done, nil
 }

-func isServerRunning(ctx context.Context) bool {
+func IsServerRunning(ctx context.Context) bool {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		slog.Info("unable to connect to server")
--- a/app/lifecycle/server_unix.go
+++ b/app/lifecycle/server_unix.go
@@ -1,4 +1,6 @@
-package main
+//go:build !windows
+
+package lifecycle

 import (
 	"context"
--- a/app/lifecycle/server_windows.go
+++ b/app/lifecycle/server_windows.go
@@ -1,4 +1,4 @@
-package main
+package lifecycle

 import (
 	"context"
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -1,4 +1,4 @@
-package updater
+package lifecycle

 import (
 	"context"
@@ -22,10 +22,6 @@ import (
 	"github.com/ollama/ollama/version"
 )

-var (
-	UpdateStageDir string
-)
-
 var (
 	UpdateCheckURLBase  = "https://ollama.com/api/update"
 	UpdateDownloaded    = false
@@ -127,7 +123,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
 		slog.Debug("no etag detected, falling back to filename based dedup")
 		etag = "_"
 	}
-	filename := "OllamaSetup.exe"
+	filename := Installer
 	_, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition"))
 	if err == nil {
 		filename = params["filename"]
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@@ -1,4 +1,6 @@
-package updater
+//go:build !windows
+
+package lifecycle

 import (
 	"context"
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -1,4 +1,4 @@
-package updater
+package lifecycle

 import (
 	"context"
@@ -9,13 +9,7 @@ import (
 	"path/filepath"
 )

-func init() {
-	UpdateStageDir = filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "updates")
-}
-
 func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	logFile := filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "upgrade.log")
-
 	files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform
 	if err != nil {
 		return fmt.Errorf("failed to lookup downloads: %s", err)
@@ -29,13 +23,13 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 	installerExe := files[0]

 	slog.Info("starting upgrade with " + installerExe)
-	slog.Info("upgrade log file " + logFile)
+	slog.Info("upgrade log file " + UpgradeLogFile)

 	// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
 	installArgs := []string{
-		"/CLOSEAPPLICATIONS",             // Quit the tray app if it's still running
-		"/LOG=" + filepath.Base(logFile), // Only relative seems reliable, so set pwd
-		"/FORCECLOSEAPPLICATIONS",        // Force close the tray app - might be needed
+		"/CLOSEAPPLICATIONS",                    // Quit the tray app if it's still running
+		"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
+		"/FORCECLOSEAPPLICATIONS",               // Force close the tray app - might be needed
 	}
 	// When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts)
 	// TODO - temporarily disable since we're pinning in debug mode for the preview
@@ -59,7 +53,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 	}

 	slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs))
-	os.Chdir(filepath.Dir(logFile)) //nolint:errcheck
+	os.Chdir(filepath.Dir(UpgradeLogFile)) //nolint:errcheck
 	cmd := exec.Command(installerExe, installArgs...)

 	if err := cmd.Start(); err != nil {
--- a/app/main.go
+++ b/app/main.go
@@ -2,15 +2,11 @@ package main

 // Compile with the following to get rid of the cmd pop up on windows
 // go build -ldflags="-H windowsgui" .
-var (
-	AppName       string
-	CLIName       string
-	AppDir        string
-	AppDataDir    string
-	AppLogFile    string
-	ServerLogFile string
+
+import (
+	"github.com/ollama/ollama/app/lifecycle"
 )

 func main() {
-	run()
+	lifecycle.Run()
 }
--- a/app/windows/ollama.iss
+++ b/app/windows/ollama.iss
@@ -88,8 +88,8 @@ DialogFontSize=12
 [Files]
 Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
 Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
+Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
+Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
 #if DirExists("..\dist\windows-amd64\rocm")
--- a/app/windows/ollama.rc
+++ b/app/windows/ollama.rc
--- a/app/windows/ollama_welcome.ps1
+++ b/app/windows/ollama_welcome.ps1
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -1,3 +1,5 @@
+//go:build !windows
+
 package tray

 import (
--- a/auth/auth.go
+++ b/auth/auth.go
@@ -10,12 +10,44 @@ import (
 	"log/slog"
 	"os"
 	"path/filepath"
+	"strings"

 	"golang.org/x/crypto/ssh"
 )

 const defaultPrivateKey = "id_ed25519"

+func keyPath() (string, error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", err
+	}
+
+	return filepath.Join(home, ".ollama", defaultPrivateKey), nil
+}
+
+func GetPublicKey() (string, error) {
+	keyPath, err := keyPath()
+	if err != nil {
+		return "", err
+	}
+
+	privateKeyFile, err := os.ReadFile(keyPath)
+	if err != nil {
+		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
+		return "", err
+	}
+
+	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
+	if err != nil {
+		return "", err
+	}
+
+	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
+
+	return strings.TrimSpace(string(publicKey)), nil
+}
+
 func NewNonce(r io.Reader, length int) (string, error) {
 	nonce := make([]byte, length)
 	if _, err := io.ReadFull(r, nonce); err != nil {
@@ -26,13 +58,11 @@ func NewNonce(r io.Reader, length int) (string, error) {
 }

 func Sign(ctx context.Context, bts []byte) (string, error) {
-	home, err := os.UserHomeDir()
+	keyPath, err := keyPath()
 	if err != nil {
 		return "", err
 	}

-	keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
-
 	privateKeyFile, err := os.ReadFile(keyPath)
 	if err != nil {
 		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -32,10 +32,12 @@ import (
 	"golang.org/x/term"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/format"
-	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/progress"
 	"github.com/ollama/ollama/server"
+	"github.com/ollama/ollama/types/errtypes"
+	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )

@@ -54,12 +56,13 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()

-	modelfile, err := os.ReadFile(filename)
+	f, err := os.Open(filename)
 	if err != nil {
 		return err
 	}
+	defer f.Close()

-	commands, err := parser.Parse(bytes.NewReader(modelfile))
+	modelfile, err := model.ParseFile(f)
 	if err != nil {
 		return err
 	}
@@ -73,10 +76,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	spinner := progress.NewSpinner(status)
 	p.Add(status, spinner)

-	for _, c := range commands {
-		switch c.Name {
+	for i := range modelfile.Commands {
+		switch modelfile.Commands[i].Name {
 		case "model", "adapter":
-			path := c.Args
+			path := modelfile.Commands[i].Args
 			if path == "~" {
 				path = home
 			} else if strings.HasPrefix(path, "~/") {
@@ -88,7 +91,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 			}

 			fi, err := os.Stat(path)
-			if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
+			if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" {
 				continue
 			} else if err != nil {
 				return err
@@ -111,13 +114,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				return err
 			}

-			name := c.Name
-			if c.Name == "model" {
-				name = "from"
-			}
-
-			re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
-			modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
+			modelfile.Commands[i].Args = "@" + digest
 		}
 	}

@@ -147,7 +144,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {

 	quantization, _ := cmd.Flags().GetString("quantization")

-	request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile), Quantization: quantization}
+	request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization}
 	if err := client.Create(cmd.Context(), &request, fn); err != nil {
 		return err
 	}
@@ -357,6 +354,47 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	return generateInteractive(cmd, opts)
 }

+func errFromUnknownKey(unknownKeyErr error) error {
+	// find SSH public key in the error message
+	sshKeyPattern := `ssh-\w+ [^\s"]+`
+	re := regexp.MustCompile(sshKeyPattern)
+	matches := re.FindStringSubmatch(unknownKeyErr.Error())
+
+	if len(matches) > 0 {
+		serverPubKey := matches[0]
+
+		localPubKey, err := auth.GetPublicKey()
+		if err != nil {
+			return unknownKeyErr
+		}
+
+		if runtime.GOOS == "linux" && serverPubKey != localPubKey {
+			// try the ollama service public key
+			svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
+			if err != nil {
+				return unknownKeyErr
+			}
+			localPubKey = strings.TrimSpace(string(svcPubKey))
+		}
+
+		// check if the returned public key matches the local public key, this prevents adding a remote key to the user's account
+		if serverPubKey != localPubKey {
+			return unknownKeyErr
+		}
+
+		var msg strings.Builder
+		msg.WriteString(unknownKeyErr.Error())
+		msg.WriteString("\n\nYour ollama key is:\n")
+		msg.WriteString(localPubKey)
+		msg.WriteString("\nAdd your key at:\n")
+		msg.WriteString("https://ollama.com/settings/keys")
+
+		return errors.New(msg.String())
+	}
+
+	return unknownKeyErr
+}
+
 func PushHandler(cmd *cobra.Command, args []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
@@ -404,6 +442,20 @@ func PushHandler(cmd *cobra.Command, args []string) error {

 	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	if err := client.Push(cmd.Context(), &request, fn); err != nil {
+		if spinner != nil {
+			spinner.Stop()
+		}
+		if strings.Contains(err.Error(), "access denied") {
+			return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
+		}
+		host := model.ParseName(args[0]).Host
+		isOllamaHost := strings.HasSuffix(host, ".ollama.ai") || strings.HasSuffix(host, ".ollama.com")
+		if strings.Contains(err.Error(), errtypes.UnknownOllamaKeyErrMsg) && isOllamaHost {
+			// the user has not added their ollama key to ollama.com
+			// re-throw an error with a more user-friendly message
+			return errFromUnknownKey(err)
+		}
+
 		return err
 	}

@@ -831,19 +883,17 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 }

 func RunServer(cmd *cobra.Command, _ []string) error {
-	host, port, err := net.SplitHostPort(strings.Trim(os.Getenv("OLLAMA_HOST"), "\"'"))
+	// retrieve the OLLAMA_HOST environment variable
+	ollamaHost, err := api.GetOllamaHost()
 	if err != nil {
-		host, port = "127.0.0.1", "11434"
-		if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil {
-			host = ip.String()
-		}
+		return err
 	}

 	if err := initializeKeypair(); err != nil {
 		return err
 	}

-	ln, err := net.Listen("tcp", net.JoinHostPort(host, port))
+	ln, err := net.Listen("tcp", net.JoinHostPort(ollamaHost.Host, ollamaHost.Port))
 	if err != nil {
 		return err
 	}
@@ -1069,7 +1119,7 @@ Environment Variables:
 		RunE:    ListHandler,
 	}
 	copyCmd := &cobra.Command{
-		Use:     "cp SOURCE TARGET",
+		Use:     "cp SOURCE DESTINATION",
 		Short:   "Copy a model",
 		Args:    cobra.ExactArgs(2),
 		PreRunE: checkServerHeartbeat,
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -94,6 +94,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  /show           Show model information")
 		fmt.Fprintln(os.Stderr, "  /load <model>   Load a session or model")
 		fmt.Fprintln(os.Stderr, "  /save <model>   Save your current session")
+		fmt.Fprintln(os.Stderr, "  /clear          Clear session context")
 		fmt.Fprintln(os.Stderr, "  /bye            Exit")
 		fmt.Fprintln(os.Stderr, "  /?, /help       Help for a command")
 		fmt.Fprintln(os.Stderr, "  /? shortcuts    Help for keyboard shortcuts")
@@ -280,6 +281,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			}
 			fmt.Printf("Created new model '%s'\n", args[1])
 			continue
+		case strings.HasPrefix(line, "/clear"):
+			opts.Messages = []api.Message{}
+			fmt.Println("Cleared session context")
+			continue
 		case strings.HasPrefix(line, "/set"):
 			args := strings.Fields(line)
 			if len(args) > 1 {
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -5,6 +5,7 @@ import (
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
+	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
@@ -47,7 +48,7 @@ type ByteOrder interface {
 type ModelArch interface {
 	GetTensors() error
 	LoadVocab() error
-	WriteGGUF() (string, error)
+	WriteGGUF(io.WriteSeeker) error
 }

 type ModelFormat interface {
--- a/convert/gemma.go
+++ b/convert/gemma.go
@@ -94,7 +94,7 @@ func (m *GemmaModel) LoadVocab() error {
 	return nil
 }

-func (m *GemmaModel) WriteGGUF() (string, error) {
+func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "gemma",
 		"general.name":                           m.Name,
@@ -122,16 +122,5 @@ func (m *GemmaModel) WriteGGUF() (string, error) {
 		"tokenizer.ggml.add_eos_token":    false,
 	}

-	f, err := os.CreateTemp("", "ollama-gguf")
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	mod := llm.NewGGUFV3(m.Params.ByteOrder)
-	if err := mod.Encode(f, kv, m.Tensors); err != nil {
-		return "", err
-	}
-
-	return f.Name(), nil
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -132,7 +132,7 @@ func (m *LlamaModel) LoadVocab() error {
 	return nil
 }

-func (m *LlamaModel) WriteGGUF() (string, error) {
+func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
@@ -161,16 +161,9 @@ func (m *LlamaModel) WriteGGUF() (string, error) {

 	f, err := os.CreateTemp("", "ollama-gguf")
 	if err != nil {
-		return "", err
+		return err
 	}
 	defer f.Close()

-	mod := llm.NewGGUFV3(m.Params.ByteOrder)
-	if err := mod.Encode(f, kv, m.Tensors); err != nil {
-		return "", err
-	}
-
-	slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))
-
-	return f.Name(), nil
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors)
 }
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -132,7 +132,7 @@ func (m *MistralModel) LoadVocab() error {
 	return nil
 }

-func (m *MistralModel) WriteGGUF() (string, error) {
+func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
@@ -158,16 +158,5 @@ func (m *MistralModel) WriteGGUF() (string, error) {
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}

-	f, err := os.CreateTemp("", "ollama-gguf")
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	mod := llm.NewGGUFV3(m.Params.ByteOrder)
-	if err := mod.Encode(f, kv, m.Tensors); err != nil {
-		return "", err
-	}
-
-	return f.Name(), nil
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
--- a/convert/mixtral.go
+++ b/convert/mixtral.go
@@ -1,7 +1,7 @@
 package convert

 import (
-	"os"
+	"io"
 	"regexp"

 	"github.com/ollama/ollama/llm"
@@ -47,7 +47,7 @@ func (m *MixtralModel) LoadVocab() error {
 	return nil
 }

-func (m *MixtralModel) WriteGGUF() (string, error) {
+func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":          "llama",
 		"general.name":                  m.Name,
@@ -81,16 +81,5 @@ func (m *MixtralModel) WriteGGUF() (string, error) {
 		"tokenizer.ggml.add_eos_token":    false,
 	}

-	f, err := os.CreateTemp("", "ollama-gguf")
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	mod := llm.NewGGUFV3(m.Params.ByteOrder)
-	if err := mod.Encode(f, kv, m.Tensors); err != nil {
-		return "", err
-	}
-
-	return f.Name(), nil
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
--- a/docs/api.md
+++ b/docs/api.md
@@ -17,7 +17,7 @@

 ### Model names

-Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
+Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.

 ### Durations

@@ -66,7 +66,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama2",
+  "model": "llama3",
  "prompt": "Why is the sky blue?"
 }'
 ```
@@ -77,7 +77,7 @@ A stream of JSON objects is returned:

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T08:52:19.385406455-07:00",
  "response": "The",
  "done": false
@@ -99,7 +99,7 @@ To calculate how fast the response is generated in tokens per second (token/s),

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "",
  "done": true,
@@ -121,7 +121,7 @@ A response can be received in one reply when streaming is off.

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama2",
+  "model": "llama3",
  "prompt": "Why is the sky blue?",
  "stream": false
 }'
@@ -133,7 +133,7 @@ If `stream` is set to `false`, the response will be a single JSON object:

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "done": true,
@@ -155,7 +155,7 @@ If `stream` is set to `false`, the response will be a single JSON object:

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama2",
+  "model": "llama3",
  "prompt": "What color is the sky at different times of the day? Respond using JSON",
  "format": "json",
  "stream": false
@@ -166,7 +166,7 @@ curl http://localhost:11434/api/generate -d '{

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-11-09T21:07:55.186497Z",
  "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
  "done": true,
@@ -289,7 +289,7 @@ If you want to set custom options for the model at runtime rather than in the Mo

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama2",
+  "model": "llama3",
  "prompt": "Why is the sky blue?",
  "stream": false,
  "options": {
@@ -332,7 +332,7 @@ curl http://localhost:11434/api/generate -d '{

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "done": true,
@@ -354,7 +354,7 @@ If an empty prompt is provided, the model will be loaded into memory.

 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama2"
+  "model": "llama3"
 }'
 ```

@@ -364,7 +364,7 @@ A single JSON object is returned:

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-12-18T19:52:07.071755Z",
  "response": "",
  "done": true
@@ -407,7 +407,7 @@ Send a chat message with a streaming response.

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama2",
+  "model": "llama3",
  "messages": [
    {
      "role": "user",
@@ -423,7 +423,7 @@ A stream of JSON objects is returned:

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T08:52:19.385406455-07:00",
  "message": {
    "role": "assistant",
@@ -438,7 +438,7 @@ Final response:

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "done": true,
  "total_duration": 4883583458,
@@ -456,7 +456,7 @@ Final response:

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama2",
+  "model": "llama3",
  "messages": [
    {
      "role": "user",
@@ -471,7 +471,7 @@ curl http://localhost:11434/api/chat -d '{

 ```json
 {
-  "model": "registry.ollama.ai/library/llama2:latest",
+  "model": "registry.ollama.ai/library/llama3:latest",
  "created_at": "2023-12-12T14:13:43.416799Z",
  "message": {
    "role": "assistant",
@@ -495,7 +495,7 @@ Send a chat message with a conversation history. You can use this same approach

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama2",
+  "model": "llama3",
  "messages": [
    {
      "role": "user",
@@ -519,7 +519,7 @@ A stream of JSON objects is returned:

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T08:52:19.385406455-07:00",
  "message": {
    "role": "assistant",
@@ -533,7 +533,7 @@ Final response:

 ```json
 {
-  "model": "llama2",
+  "model": "llama3",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "done": true,
  "total_duration": 8113331500,
@@ -591,7 +591,7 @@ curl http://localhost:11434/api/chat -d '{

 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama2",
+  "model": "llama3",
  "messages": [
    {
      "role": "user",
@@ -609,7 +609,7 @@ curl http://localhost:11434/api/chat -d '{

 ```json
 {
-  "model": "registry.ollama.ai/library/llama2:latest",
+  "model": "registry.ollama.ai/library/llama3:latest",
  "created_at": "2023-12-12T14:13:43.416799Z",
  "message": {
    "role": "assistant",
@@ -651,7 +651,7 @@ Create a new model from a `Modelfile`.
 ```shell
 curl http://localhost:11434/api/create -d '{
  "name": "mario",
-  "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
+  "modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros."
 }'
 ```

@@ -758,7 +758,7 @@ A single JSON object will be returned.
      }
    },
    {
-      "name": "llama2:latest",
+      "name": "llama3:latest",
      "modified_at": "2023-12-07T09:32:18.757212583-08:00",
      "size": 3825819519,
      "digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
@@ -792,7 +792,7 @@ Show information about a model including details, modelfile, template, parameter

 ```shell
 curl http://localhost:11434/api/show -d '{
-  "name": "llama2"
+  "name": "llama3"
 }'
 ```

@@ -827,8 +827,8 @@ Copy a model. Creates a model with another name from an existing model.

 ```shell
 curl http://localhost:11434/api/copy -d '{
-  "source": "llama2",
-  "destination": "llama2-backup"
+  "source": "llama3",
+  "destination": "llama3-backup"
 }'
 ```

@@ -854,7 +854,7 @@ Delete a model and its data.

 ```shell
 curl -X DELETE http://localhost:11434/api/delete -d '{
-  "name": "llama2:13b"
+  "name": "llama3:13b"
 }'
 ```

@@ -882,7 +882,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where

 ```shell
 curl http://localhost:11434/api/pull -d '{
-  "name": "llama2"
+  "name": "llama3"
 }'
 ```

--- a/docs/development.md
+++ b/docs/development.md
@@ -51,7 +51,7 @@ Typically the build scripts will auto-detect CUDA, however, if your Linux distro
 or installation approach uses unusual paths, you can specify the location by
 specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
 libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
-set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
+a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")

 Then generate dependencies:

@@ -142,4 +142,4 @@ In addition to the common Windows development tools described above, install AMD
 - [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
 - [Strawberry Perl](https://strawberryperl.com/)

-Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
+Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter:

 ```
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama2",
+  "model": "llama3",
  "prompt": "Why is the sky blue?",
  "options": {
    "num_ctx": 4096
@@ -88,9 +88,9 @@ On windows, Ollama inherits your user and system environment variables.

 3. Edit or create New variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.

-4. Click OK/Apply to save 
+4. Click OK/Apply to save

-5. Run `ollama` from a new terminal window 
+5. Run `ollama` from a new terminal window


 ## How can I expose Ollama on my network?
@@ -221,12 +221,12 @@ The `keep_alive` parameter can be set to:

 For example, to preload a model and leave it in memory use:
 ```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": -1}'
+curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}'
 ```

 To unload the model and free up memory use:
 ```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": 0}'
+curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}'
 ```

 Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -10,7 +10,7 @@ A model file is the blueprint to create and share models with Ollama.
 - [Examples](#examples)
 - [Instructions](#instructions)
  - [FROM (Required)](#from-required)
-    - [Build from llama2](#build-from-llama2)
+    - [Build from llama3](#build-from-llama3)
    - [Build from a bin file](#build-from-a-bin-file)
  - [PARAMETER](#parameter)
    - [Valid Parameters and Values](#valid-parameters-and-values)
@@ -48,7 +48,7 @@ INSTRUCTION arguments
 An example of a `Modelfile` creating a mario blueprint:

 ```modelfile
-FROM llama2
+FROM llama3
 # sets the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
@@ -67,33 +67,25 @@ To use this:

 More examples are available in the [examples directory](../examples).

-### `Modelfile`s in [ollama.com/library][1]
-
-There are two ways to view `Modelfile`s underlying the models in [ollama.com/library][1]:
-
- Option 1: view a details page from a model's tags page:
-  1.  Go to a particular model's tags (e.g. https://ollama.com/library/llama2/tags)
-  2.  Click on a tag (e.g. https://ollama.com/library/llama2:13b)
-  3.  Scroll down to "Layers"
-      - Note: if the [`FROM` instruction](#from-required) is not present,
-        it means the model was created from a local file
- Option 2: use `ollama show` to print the `Modelfile` for any local models like so:
+To view the Modelfile of a given model, use the `ollama show --modelfile` command.

  ```bash
-  > ollama show --modelfile llama2:13b
+  > ollama show --modelfile llama3
  # Modelfile generated by "ollama show"
  # To build a new Modelfile based on this one, replace the FROM line with:
-  # FROM llama2:13b
+  # FROM llama3:latest
+  FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
+  TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>

-  FROM /root/.ollama/models/blobs/sha256:123abc
-  TEMPLATE """[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>>
+  {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

-  {{ end }}{{ .Prompt }} [/INST] """
-  SYSTEM """"""
-  PARAMETER stop [INST]
-  PARAMETER stop [/INST]
-  PARAMETER stop <<SYS>>
-  PARAMETER stop <</SYS>>
+  {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
+
+  {{ .Response }}<|eot_id|>"""
+  PARAMETER stop "<|start_header_id|>"
+  PARAMETER stop "<|end_header_id|>"
+  PARAMETER stop "<|eot_id|>"
+  PARAMETER stop "<|reserved_special_token"
  ```

 ## Instructions
@@ -106,10 +98,10 @@ The `FROM` instruction defines the base model to use when creating a model.
 FROM <model name>:<tag>
 ```

-#### Build from llama2
+#### Build from llama3

 ```modelfile
-FROM llama2
+FROM llama3
 ```

 A list of available base models:
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create(
            'content': 'Say this is a test',
        }
    ],
-    model='llama2',
+    model='llama3',
 )
 ```

@@ -43,7 +43,7 @@ const openai = new OpenAI({

 const chatCompletion = await openai.chat.completions.create({
  messages: [{ role: 'user', content: 'Say this is a test' }],
-  model: 'llama2',
+  model: 'llama3',
 })
 ```

@@ -53,7 +53,7 @@ const chatCompletion = await openai.chat.completions.create({
 curl http://localhost:11434/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
-        "model": "llama2",
+        "model": "llama3",
        "messages": [
            {
                "role": "system",
@@ -113,7 +113,7 @@ curl http://localhost:11434/v1/chat/completions \
 Before using a model, pull it locally `ollama pull`:

 ```shell
-ollama pull llama2
+ollama pull llama3
 ```

 ### Default model names
@@ -121,7 +121,7 @@ ollama pull llama2
 For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:

 ```
-ollama cp llama2 gpt-3.5-turbo
+ollama cp llama3 gpt-3.5-turbo
 ```

 Afterwards, this new model name can be specified the `model` field:
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "langchain/llms/ollama";

 const ollama = new Ollama({
  baseUrl: "http://localhost:11434",
-  model: "llama2",
+  model: "llama3",
 });

 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,10 +23,10 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```

-That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.

 ```bash
-npm install cheerio 
+npm install cheerio
 ```

 ```javascript
--- a/docs/tutorials/langchainpy.md
+++ b/docs/tutorials/langchainpy.md
@@ -17,10 +17,12 @@ Let's start by asking a simple question that we can get an answer to from the **
 Then we can create a model and ask the question:

 ```python
-from langchain.llms import Ollama
-ollama = Ollama(base_url='http://localhost:11434',
-model="llama2")
-print(ollama("why is the sky blue"))
+from langchain_community.llms import Ollama
+ollama = Ollama(
+    base_url='http://localhost:11434',
+    model="llama3"
+)
+print(ollama.invoke("why is the sky blue"))
 ```

 Notice that we are defining the model and the base URL for Ollama.
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -1,47 +1,48 @@
-# Ollama Windows Preview
-
-Welcome to the Ollama Windows preview.
-
-No more WSL required!
-
-Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
-After installing Ollama Windows Preview, Ollama will run in the background and
-the `ollama` command line is available in `cmd`, `powershell` or your favorite
-terminal application. As usual the Ollama [api](./api.md) will be served on
-`http://localhost:11434`.
-
-As this is a preview release, you should expect a few bugs here and there.  If
-you run into a problem you can reach out on
-[Discord](https://discord.gg/ollama), or file an 
-[issue](https://github.com/ollama/ollama/issues).
-Logs will often be helpful in diagnosing the problem (see
-[Troubleshooting](#troubleshooting) below)
-
-## System Requirements
-
-* Windows 10 or newer, Home or Pro
-* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
-* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
-
-## API Access
-
-Here's a quick example showing API access from `powershell`
-```powershell
-(Invoke-WebRequest -method POST -Body '{"model":"llama2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
-```
-
-## Troubleshooting
-
-While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
-a "view logs" menu item to the app, and increses logging for the GUI app and
-server.
-
-Ollama on Windows stores files in a few different locations.  You can view them in
-the explorer window by hitting `<cmd>+R` and type in:
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
-    - *app.log* contains logs from the GUI application
-    - *server.log* contains the server logs
-    - *upgrade.log* contains log output for upgrades
- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
- `explorer %HOMEPATH%\.ollama` contains models and configuration
- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
+<<<<<<< HEAD
+# Ollama Windows Preview
+
+Welcome to the Ollama Windows preview.
+
+No more WSL required!
+
+Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
+After installing Ollama Windows Preview, Ollama will run in the background and
+the `ollama` command line is available in `cmd`, `powershell` or your favorite
+terminal application. As usual the Ollama [api](./api.md) will be served on
+`http://localhost:11434`.
+
+As this is a preview release, you should expect a few bugs here and there.  If
+you run into a problem you can reach out on
+[Discord](https://discord.gg/ollama), or file an
+[issue](https://github.com/ollama/ollama/issues).
+Logs will often be helpful in diagnosing the problem (see
+[Troubleshooting](#troubleshooting) below)
+
+## System Requirements
+
+* Windows 10 or newer, Home or Pro
+* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
+* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
+
+## API Access
+
+Here's a quick example showing API access from `powershell`
+```powershell
+(Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
+```
+
+## Troubleshooting
+
+While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
+a "view logs" menu item to the app, and increses logging for the GUI app and
+server.
+
+Ollama on Windows stores files in a few different locations.  You can view them in
+the explorer window by hitting `<cmd>+R` and type in:
+- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
+    - *app.log* contains logs from the GUI application
+    - *server.log* contains the server logs
+    - *upgrade.log* contains log output for upgrades
+- `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
+- `explorer %HOMEPATH%\.ollama` contains models and configuration
+- `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
--- a/examples/bash-comparemodels/README.md
+++ b/examples/bash-comparemodels/README.md
@@ -2,7 +2,7 @@

 When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:

-`ollama run llama2 < sourcequestions.txt`
+`ollama run llama3 < sourcequestions.txt`

 This concept is used in the following example.

--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {

 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama2",
+		Model:    "llama3",
 		Messages: messages,
 	}

--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -40,9 +40,9 @@ while True:
        continue

    # Prompt
-    template = """Use the following pieces of context to answer the question at the end. 
-    If you don't know the answer, just say that you don't know, don't try to make up an answer. 
-    Use three sentences maximum and keep the answer as concise as possible. 
+    template = """Use the following pieces of context to answer the question at the end.
+    If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    Use three sentences maximum and keep the answer as concise as possible.
    {context}
    Question: {question}
    Helpful Answer:"""
@@ -51,11 +51,11 @@ while True:
        template=template,
    )

-    llm = Ollama(model="llama2:13b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    )

-    result = qa_chain({"query": query})
+    result = qa_chain({"query": query})
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.

 ## Running the Example

-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3` model installed:

   ```bash
-   ollama pull llama2
+   ollama pull llama3
   ```

 2. Install the Python Requirements.
@@ -21,4 +21,3 @@ This example is a basic "hello world" of using LangChain with Ollama.
   ```bash
   python main.py
   ```
-  
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
 from langchain.llms import Ollama

 input = input("What is your question?")
-llm = Ollama(model="llama2")
+llm = Ollama(model="llama3")
 res = llm.predict(input)
 print (res)
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama2
+FROM llama3
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@

 # Example character: Mario

-This example shows how to create a basic character using Llama2 as the base model.
+This example shows how to create a basic character using Llama3 as the base model.

 To run this example:

 1. Download the Modelfile
-2. `ollama pull llama2` to get the base model used in the model file.
+2. `ollama pull llama3` to get the base model used in the model file.
 3. `ollama create NAME -f ./Modelfile`
 4. `ollama run NAME`

@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
 What the model file looks like:

 ```
-FROM llama2
+FROM llama3
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from Super Mario Bros, acting as an assistant.
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,16 +2,16 @@ import requests
 import json
 import random

-model = "llama2"
+model = "llama3"
 template = {
-  "firstName": "", 
-  "lastName": "", 
+  "firstName": "",
+  "lastName": "",
  "address": {
-    "street": "", 
-    "city": "", 
-    "state": "", 
+    "street": "",
+    "city": "",
+    "state": "",
    "zipCode": ""
-  }, 
+  },
  "phoneNumber": ""
 }

--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -12,7 +12,7 @@ countries = [
    "France",
 ]
 country = random.choice(countries)
-model = "llama2"
+model = "llama3"

 prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."

--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran

 ## Running the Example

-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3` model installed:

   ```bash
-   ollama pull llama2
+   ollama pull llama3
   ```

 2. Install the Python Requirements.
--- a/examples/python-simplechat/client.py
+++ b/examples/python-simplechat/client.py
@@ -2,7 +2,7 @@ import json
 import requests

 # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
-model = "llama2"  # TODO: update this for whatever model you wish to use
+model = "llama3"  # TODO: update this for whatever model you wish to use


 def chat(messages):
--- a/examples/python-simplechat/readme.md
+++ b/examples/python-simplechat/readme.md
@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam

 ## Running the Example

-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3` model installed:

   ```bash
-   ollama pull llama2
+   ollama pull llama3
   ```

 2. Install the Python Requirements.
--- a/examples/typescript-mentors/README.md
+++ b/examples/typescript-mentors/README.md
@@ -4,10 +4,10 @@ This example demonstrates how one would create a set of 'mentors' you can have a

 ## Usage

-1. Add llama2 to have the mentors ask your questions:
+1. Add llama3 to have the mentors ask your questions:

   ```bash
-   ollama pull llama2
+   ollama pull llama3
   ```

 2. Install prerequisites:
--- a/examples/typescript-mentors/character-generator.ts
+++ b/examples/typescript-mentors/character-generator.ts
@@ -15,7 +15,7 @@ async function characterGenerator() {
  ollama.setModel("stablebeluga2:70b-q4_K_M");
  const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);

-  const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
+  const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;

  fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
    if (err) throw err;
@@ -23,4 +23,4 @@ async function characterGenerator() {
  });
 }

-characterGenerator();
+characterGenerator();
--- a/examples/typescript-simplechat/client.ts
+++ b/examples/typescript-simplechat/client.ts
@@ -1,6 +1,6 @@
 import * as readline from "readline";

-const model = "llama2";
+const model = "llama3";
 type Message = {
  role: "assistant" | "user" | "system";
  content: string;
@@ -74,4 +74,4 @@ async function main() {

 }

-main();
+main();
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -40,7 +40,7 @@ func PayloadsDir() (string, error) {
 			}

 			var paths []string
-			for _, root := range []string{appExe, cwd} {
+			for _, root := range []string{filepath.Dir(appExe), cwd} {
 				paths = append(paths,
 					filepath.Join(root),
 					filepath.Join(root, "windows-"+runtime.GOARCH),
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -1,3 +1,5 @@
+//go:build darwin
+
 package gpu

 /*
@@ -8,6 +10,12 @@ package gpu
 import "C"
 import (
 	"runtime"
+
+	"github.com/ollama/ollama/format"
+)
+
+const (
+	metalMinimumMemory = 512 * format.MebiByte
 )

 func GetGPUInfo() GpuInfoList {
@@ -30,7 +38,7 @@ func GetGPUInfo() GpuInfoList {
 	// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
 	info.FreeMemory = info.TotalMemory

-	info.MinimumMemory = 0
+	info.MinimumMemory = metalMinimumMemory
 	return []GpuInfo{info}
 }

--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -107,7 +107,7 @@ func startServer(ctx context.Context, ollamaHost string) error {

 	if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
 		slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
-		os.Setenv("OLLAMA_HOST", ollamaHost)
+		t.Setenv("OLLAMA_HOST", ollamaHost)
 	}

 	slog.Info("starting server", "url", ollamaHost)
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -1032,7 +1032,7 @@ struct llama_server_context
            slot.has_next_token = false;
        }

-        if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
+        if (!slot.cache_tokens.empty() && llama_token_is_eog(model, result.tok))
        {
            slot.stopped_eos = true;
            slot.has_next_token = false;
@@ -1144,12 +1144,15 @@ struct llama_server_context

        res.result_json = json
        {
-            {"content",    tkn.text_to_send},
            {"stop",       false},
            {"slot_id",    slot.id},
            {"multimodal", multimodal}
        };

+        if (!llama_token_is_eog(model, tkn.tok)) {
+            res.result_json["content"] = tkn.text_to_send;
+        }
+
        if (slot.sparams.n_probs > 0)
        {
            std::vector<completion_token_output> probs_output = {};
@@ -2644,18 +2647,18 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
            if (strncmp(sep, "int:", 4) == 0) {
                sep += 4;
                kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
-                kvo.int_value = std::atol(sep);
+                kvo.val_i64 = std::atol(sep);
            } else if (strncmp(sep, "float:", 6) == 0) {
                sep += 6;
                kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
-                kvo.float_value = std::atof(sep);
+                kvo.val_f64 = std::atof(sep);
            } else if (strncmp(sep, "bool:", 5) == 0) {
                sep += 5;
                kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
                if (std::strcmp(sep, "true") == 0) {
-                    kvo.bool_value = true;
+                    kvo.val_bool = true;
                } else if (std::strcmp(sep, "false") == 0) {
-                    kvo.bool_value = false;
+                    kvo.val_bool = false;
                } else {
                    fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
                    invalid_param = true;
--- a/llm/filetype.go
+++ b/llm/filetype.go
@@ -0,0 +1,165 @@
+package llm
+
+import "fmt"
+
+type fileType uint32
+
+const (
+	fileTypeF32 fileType = iota
+	fileTypeF16
+	fileTypeQ4_0
+	fileTypeQ4_1
+	fileTypeQ4_1_F16
+	fileTypeQ4_2 // unused
+	fileTypeQ4_3 // unused
+	fileTypeQ8_0
+	fileTypeQ5_0
+	fileTypeQ5_1
+	fileTypeQ2_K
+	fileTypeQ3_K_S
+	fileTypeQ3_K_M
+	fileTypeQ3_K_L
+	fileTypeQ4_K_S
+	fileTypeQ4_K_M
+	fileTypeQ5_K_S
+	fileTypeQ5_K_M
+	fileTypeQ6_K
+	fileTypeIQ2_XXS
+	fileTypeIQ2_XS
+	fileTypeQ2_K_S
+	fileTypeQ3_K_XS
+	fileTypeIQ3_XXS
+	fileTypeIQ1_S
+	fileTypeIQ4_NL
+	fileTypeIQ3_S
+	fileTypeIQ2_S
+	fileTypeIQ4_XS
+
+	fileTypeUnknown
+)
+
+func ParseFileType(s string) (fileType, error) {
+	switch s {
+	case "F32":
+		return fileTypeF32, nil
+	case "F16":
+		return fileTypeF16, nil
+	case "Q4_0":
+		return fileTypeQ4_0, nil
+	case "Q4_1":
+		return fileTypeQ4_1, nil
+	case "Q4_1_F16":
+		return fileTypeQ4_1_F16, nil
+	case "Q8_0":
+		return fileTypeQ8_0, nil
+	case "Q5_0":
+		return fileTypeQ5_0, nil
+	case "Q5_1":
+		return fileTypeQ5_1, nil
+	case "Q2_K":
+		return fileTypeQ2_K, nil
+	case "Q3_K_S":
+		return fileTypeQ3_K_S, nil
+	case "Q3_K_M":
+		return fileTypeQ3_K_M, nil
+	case "Q3_K_L":
+		return fileTypeQ3_K_L, nil
+	case "Q4_K_S":
+		return fileTypeQ4_K_S, nil
+	case "Q4_K_M":
+		return fileTypeQ4_K_M, nil
+	case "Q5_K_S":
+		return fileTypeQ5_K_S, nil
+	case "Q5_K_M":
+		return fileTypeQ5_K_M, nil
+	case "Q6_K":
+		return fileTypeQ6_K, nil
+	case "IQ2_XXS":
+		return fileTypeIQ2_XXS, nil
+	case "IQ2_XS":
+		return fileTypeIQ2_XS, nil
+	case "Q2_K_S":
+		return fileTypeQ2_K_S, nil
+	case "Q3_K_XS":
+		return fileTypeQ3_K_XS, nil
+	case "IQ1_S":
+		return fileTypeIQ1_S, nil
+	case "IQ4_NL":
+		return fileTypeIQ4_NL, nil
+	case "IQ3_S":
+		return fileTypeIQ3_S, nil
+	case "IQ2_S":
+		return fileTypeIQ2_S, nil
+	case "IQ4_XS":
+		return fileTypeIQ4_XS, nil
+	case "IQ3_XXS":
+		return fileTypeIQ3_XXS, nil
+	default:
+		return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
+	}
+}
+
+func (t fileType) String() string {
+	switch t {
+	case fileTypeF32:
+		return "F32"
+	case fileTypeF16:
+		return "F16"
+	case fileTypeQ4_0:
+		return "Q4_0"
+	case fileTypeQ4_1:
+		return "Q4_1"
+	case fileTypeQ4_1_F16:
+		return "Q4_1_F16"
+	case fileTypeQ8_0:
+		return "Q8_0"
+	case fileTypeQ5_0:
+		return "Q5_0"
+	case fileTypeQ5_1:
+		return "Q5_1"
+	case fileTypeQ2_K:
+		return "Q2_K"
+	case fileTypeQ3_K_S:
+		return "Q3_K_S"
+	case fileTypeQ3_K_M:
+		return "Q3_K_M"
+	case fileTypeQ3_K_L:
+		return "Q3_K_L"
+	case fileTypeQ4_K_S:
+		return "Q4_K_S"
+	case fileTypeQ4_K_M:
+		return "Q4_K_M"
+	case fileTypeQ5_K_S:
+		return "Q5_K_S"
+	case fileTypeQ5_K_M:
+		return "Q5_K_M"
+	case fileTypeQ6_K:
+		return "Q6_K"
+	case fileTypeIQ2_XXS:
+		return "IQ2_XXS"
+	case fileTypeIQ2_XS:
+		return "IQ2_XS"
+	case fileTypeQ2_K_S:
+		return "Q2_K_S"
+	case fileTypeQ3_K_XS:
+		return "Q3_K_XS"
+	case fileTypeIQ1_S:
+		return "IQ1_S"
+	case fileTypeIQ4_NL:
+		return "IQ4_NL"
+	case fileTypeIQ3_S:
+		return "IQ3_S"
+	case fileTypeIQ2_S:
+		return "IQ2_S"
+	case fileTypeIQ4_XS:
+		return "IQ4_XS"
+	case fileTypeIQ3_XXS:
+		return "IQ3_XXS"
+	default:
+		return "unknown"
+	}
+}
+
+func (t fileType) Value() uint32 {
+	return uint32(t)
+}
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -42,7 +42,7 @@ function init_vars {
        "-DLLAMA_NATIVE=off"
        )
    $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
-    $script:ARCH = "amd64" # arm not yet supported.
+    $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
    $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
    md "$script:DIST_BASE" -ea 0 > $null
    if ($env:CGO_CFLAGS -contains "-g") {
@@ -213,11 +213,11 @@ function build_static() {
    }
 }

-function build_cpu() {
+function build_cpu($gen_arch) {
    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
        # remaining llama.cpp builds use MSVC 
        init_vars
-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+        $script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
        $script:buildDir="../build/windows/${script:ARCH}/cpu"
        $script:distDir="$script:DIST_BASE\cpu"
        write-host "Building LCD CPU"
@@ -349,11 +349,15 @@ if ($($args.count) -eq 0) {
    git_module_setup
    apply_patches
    build_static
-    build_cpu
-    build_cpu_avx
-    build_cpu_avx2
-    build_cuda
-    build_rocm
+    if ($script:ARCH -eq "arm64") {
+        build_cpu("ARM64")
+    } else { # amd64
+        build_cpu("x64")
+        build_cpu_avx
+        build_cpu_avx2
+        build_cuda
+        build_rocm
+    }

    cleanup
    write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -13,82 +13,6 @@ type GGML struct {
 	model
 }

-const (
-	fileTypeF32 uint32 = iota
-	fileTypeF16
-	fileTypeQ4_0
-	fileTypeQ4_1
-	fileTypeQ4_1_F16
-	fileTypeQ8_0 uint32 = iota + 2
-	fileTypeQ5_0
-	fileTypeQ5_1
-	fileTypeQ2_K
-	fileTypeQ3_K_S
-	fileTypeQ3_K_M
-	fileTypeQ3_K_L
-	fileTypeQ4_K_S
-	fileTypeQ4_K_M
-	fileTypeQ5_K_S
-	fileTypeQ5_K_M
-	fileTypeQ6_K
-	fileTypeIQ2_XXS
-	fileTypeIQ2_XS
-	fileTypeQ2_K_S
-	fileTypeQ3_K_XS
-	fileTypeIQ3_XXS
-)
-
-func fileType(fileType uint32) string {
-	switch fileType {
-	case fileTypeF32:
-		return "F32"
-	case fileTypeF16:
-		return "F16"
-	case fileTypeQ4_0:
-		return "Q4_0"
-	case fileTypeQ4_1:
-		return "Q4_1"
-	case fileTypeQ4_1_F16:
-		return "Q4_1_F16"
-	case fileTypeQ8_0:
-		return "Q8_0"
-	case fileTypeQ5_0:
-		return "Q5_0"
-	case fileTypeQ5_1:
-		return "Q5_1"
-	case fileTypeQ2_K:
-		return "Q2_K"
-	case fileTypeQ3_K_S:
-		return "Q3_K_S"
-	case fileTypeQ3_K_M:
-		return "Q3_K_M"
-	case fileTypeQ3_K_L:
-		return "Q3_K_L"
-	case fileTypeQ4_K_S:
-		return "Q4_K_S"
-	case fileTypeQ4_K_M:
-		return "Q4_K_M"
-	case fileTypeQ5_K_S:
-		return "Q5_K_S"
-	case fileTypeQ5_K_M:
-		return "Q5_K_M"
-	case fileTypeQ6_K:
-		return "Q6_K"
-	case fileTypeIQ2_XXS:
-		return "IQ2_XXS"
-	case fileTypeIQ2_XS:
-		return "IQ2_XS"
-	case fileTypeQ2_K_S:
-		return "Q2_K_S"
-	case fileTypeQ3_K_XS:
-		return "Q3_K_XS"
-	case fileTypeIQ3_XXS:
-		return "IQ3_XXS"
-	default:
-		return "unknown"
-	}
-}
-
 type model interface {
 	KV() KV
 	Tensors() Tensors
@@ -123,7 +47,7 @@ func (kv KV) ParameterCount() uint64 {

 func (kv KV) FileType() string {
 	if u64 := kv.u64("general.file_type"); u64 > 0 {
-		return fileType(uint32(u64))
+		return fileType(uint32(u64)).String()
 	}

 	return "unknown"
@@ -201,9 +125,9 @@ type Tensor struct {

 func (t Tensor) blockSize() uint64 {
 	switch {
-	case t.Kind < 2:
+	case t.Kind < 2 || (t.Kind > 23 && t.Kind < 29):
 		return 1
-	case t.Kind < 10:
+	case t.Kind < 10 || t.Kind == 20:
 		return 32
 	default:
 		return 256
@@ -248,6 +172,16 @@ func (t Tensor) typeSize() uint64 {
 		return 2 + 2*blockSize/8 + blockSize/32
 	case 18: // IQ3_XXS
 		return 2 + 3*blockSize/8
+	case 19: // IQ1_S
+		return 2 + blockSize/8 + blockSize/16
+	case 20: // IQ4_NL
+		return 2 + blockSize/2
+	case 21: // IQ3_S
+		return 2 + 2*blockSize/8 + blockSize/8 + blockSize/32 + 4
+	case 22: // IQ2_S
+		return 2 + blockSize/4 + blockSize/16
+	case 23: // IQ4_XS
+		return 4 + blockSize/2 + blockSize/64
 	default:
 		return 0
 	}
@@ -286,6 +220,23 @@ const (

 var ErrUnsupportedFormat = errors.New("unsupported model format")

+func DetectGGMLType(b []byte) string {
+	switch binary.LittleEndian.Uint32(b[:4]) {
+	case FILE_MAGIC_GGML:
+		return "ggml"
+	case FILE_MAGIC_GGMF:
+		return "ggmf"
+	case FILE_MAGIC_GGJT:
+		return "ggjt"
+	case FILE_MAGIC_GGLA:
+		return "ggla"
+	case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
+		return "gguf"
+	default:
+		return ""
+	}
+}
+
 func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
 	var magic uint32
 	if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -4,6 +4,7 @@ package llm
 // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
 // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
 // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
+// #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
 // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
 // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
 // #include <stdlib.h>
@@ -19,7 +20,7 @@ func SystemInfo() string {
 	return C.GoString(C.llama_print_system_info())
 }

-func Quantize(infile, outfile, filetype string) error {
+func Quantize(infile, outfile string, ftype fileType) error {
 	cinfile := C.CString(infile)
 	defer C.free(unsafe.Pointer(cinfile))

@@ -28,58 +29,10 @@ func Quantize(infile, outfile, filetype string) error {

 	params := C.llama_model_quantize_default_params()
 	params.nthread = -1
+	params.ftype = ftype.Value()

-	switch filetype {
-	case "F32":
-		params.ftype = fileTypeF32
-	case "F16":
-		params.ftype = fileTypeF16
-	case "Q4_0":
-		params.ftype = fileTypeQ4_0
-	case "Q4_1":
-		params.ftype = fileTypeQ4_1
-	case "Q4_1_F16":
-		params.ftype = fileTypeQ4_1_F16
-	case "Q8_0":
-		params.ftype = fileTypeQ8_0
-	case "Q5_0":
-		params.ftype = fileTypeQ5_0
-	case "Q5_1":
-		params.ftype = fileTypeQ5_1
-	case "Q2_K":
-		params.ftype = fileTypeQ2_K
-	case "Q3_K_S":
-		params.ftype = fileTypeQ3_K_S
-	case "Q3_K_M":
-		params.ftype = fileTypeQ3_K_M
-	case "Q3_K_L":
-		params.ftype = fileTypeQ3_K_L
-	case "Q4_K_S":
-		params.ftype = fileTypeQ4_K_S
-	case "Q4_K_M":
-		params.ftype = fileTypeQ4_K_M
-	case "Q5_K_S":
-		params.ftype = fileTypeQ5_K_S
-	case "Q5_K_M":
-		params.ftype = fileTypeQ5_K_M
-	case "Q6_K":
-		params.ftype = fileTypeQ6_K
-	case "IQ2_XXS":
-		params.ftype = fileTypeIQ2_XXS
-	case "IQ2_XS":
-		params.ftype = fileTypeIQ2_XS
-	case "Q2_K_S":
-		params.ftype = fileTypeQ2_K_S
-	case "Q3_K_XS":
-		params.ftype = fileTypeQ3_K_XS
-	case "IQ3_XXS":
-		params.ftype = fileTypeIQ3_XXS
-	default:
-		return fmt.Errorf("unknown filetype: %s", filetype)
-	}
-
-	if retval := C.llama_model_quantize(cinfile, coutfile, &params); retval != 0 {
-		return fmt.Errorf("llama_model_quantize: %d", retval)
+	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
+		return fmt.Errorf("llama_model_quantize: %d", rc)
 	}

 	return nil
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 	graphFullOffload *= uint64(len(gpus))
 	graphPartialOffload *= uint64(len(gpus))

+	// on metal there's no partial offload overhead
+	if gpus[0].Library == "metal" {
+		graphPartialOffload = graphFullOffload
+	}
+
 	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
 	memoryRequiredTotal := memoryMinimum + graphFullOffload

--- a/llm/server.go
+++ b/llm/server.go
@@ -73,8 +73,7 @@ func LoadModel(model string) (*GGML, error) {
 func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
 	var err error
 	if opts.NumCtx > int(ggml.KV().ContextLength()) {
-		slog.Warn("requested context length is greater than model max context length", "requested", opts.NumCtx, "model", ggml.KV().ContextLength())
-		opts.NumCtx = int(ggml.KV().ContextLength())
+		slog.Warn("requested context length is greater than the model's training context window size", "requested", opts.NumCtx, "training size", ggml.KV().ContextLength())
 	}

 	if opts.NumCtx < 4 {
@@ -301,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			continue
 		}

-		// reap subprocess when it exits
-		go func() {
-			// Exit status managed via getServerStatus
-			_ = s.cmd.Wait()
-		}()
-
 		// TODO - make sure this is all wired up correctly
 		// if err = s.WaitUntilRunning(); err != nil {
 		// 	slog.Error("error starting llama server", "server", servers[i], "error", err)
@@ -900,7 +893,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error
 func (s *llmServer) Close() error {
 	if s.cmd != nil {
 		slog.Debug("stopping llama server")
-		return s.cmd.Process.Kill()
+		if err := s.cmd.Process.Kill(); err != nil {
+			return err
+		}
+
+		_ = s.cmd.Wait()
+
+		slog.Debug("llama server stopped")
 	}

 	return nil
--- a/macapp/.eslintrc.json
+++ b/macapp/.eslintrc.json
@@ -0,0 +1,16 @@
+{
+  "env": {
+    "browser": true,
+    "es6": true,
+    "node": true
+  },
+  "extends": [
+    "eslint:recommended",
+    "plugin:@typescript-eslint/eslint-recommended",
+    "plugin:@typescript-eslint/recommended",
+    "plugin:import/recommended",
+    "plugin:import/electron",
+    "plugin:import/typescript"
+  ],
+  "parser": "@typescript-eslint/parser"
+}
--- a/macapp/.gitignore
+++ b/macapp/.gitignore
@@ -0,0 +1,92 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+.DS_Store
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# TypeScript v1 declaration files
+typings/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variables file
+.env
+.env.test
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+
+# next.js build output
+.next
+
+# nuxt.js build output
+.nuxt
+
+# vuepress build output
+.vuepress/dist
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# Webpack
+.webpack/
+
+# Vite
+.vite/
+
+# Electron-Forge
+out/
--- a/macapp/README.md
+++ b/macapp/README.md
@@ -0,0 +1,21 @@
+# Desktop
+
+This app builds upon Ollama to provide a desktop experience for running models.
+
+## Developing
+
+First, build the `ollama` binary:
+
+```
+cd ..
+go build .
+```
+
+Then run the desktop app with `npm start`:
+
+```
+cd macapp
+npm install
+npm start
+```
+
--- a/app/darwin/Ollama.app/Contents/Resources/icon.icns
+++ b/app/darwin/Ollama.app/Contents/Resources/icon.icns
--- a/macapp/assets/iconDarkTemplate.png
+++ b/macapp/assets/iconDarkTemplate.png
--- a/macapp/assets/iconDarkTemplate@2x.png
+++ b/macapp/assets/iconDarkTemplate@2x.png
--- a/macapp/assets/iconDarkUpdateTemplate.png
+++ b/macapp/assets/iconDarkUpdateTemplate.png
--- a/macapp/assets/iconDarkUpdateTemplate@2x.png
+++ b/macapp/assets/iconDarkUpdateTemplate@2x.png
--- a/macapp/assets/iconTemplate.png
+++ b/macapp/assets/iconTemplate.png
--- a/macapp/assets/iconTemplate@2x.png
+++ b/macapp/assets/iconTemplate@2x.png
--- a/macapp/assets/iconUpdateTemplate.png
+++ b/macapp/assets/iconUpdateTemplate.png
--- a/macapp/assets/iconUpdateTemplate@2x.png
+++ b/macapp/assets/iconUpdateTemplate@2x.png
--- a/macapp/forge.config.ts
+++ b/macapp/forge.config.ts
@@ -0,0 +1,78 @@
+import type { ForgeConfig } from '@electron-forge/shared-types'
+import { MakerSquirrel } from '@electron-forge/maker-squirrel'
+import { MakerZIP } from '@electron-forge/maker-zip'
+import { PublisherGithub } from '@electron-forge/publisher-github'
+import { AutoUnpackNativesPlugin } from '@electron-forge/plugin-auto-unpack-natives'
+import { WebpackPlugin } from '@electron-forge/plugin-webpack'
+import * as path from 'path'
+import * as fs from 'fs'
+
+import { mainConfig } from './webpack.main.config'
+import { rendererConfig } from './webpack.renderer.config'
+
+const packageJson = JSON.parse(fs.readFileSync(path.resolve(__dirname, './package.json'), 'utf8'))
+
+const config: ForgeConfig = {
+  packagerConfig: {
+    appVersion: process.env.VERSION || packageJson.version,
+    asar: true,
+    icon: './assets/icon.icns',
+    extraResource: [
+      '../dist/ollama',
+      path.join(__dirname, './assets/iconTemplate.png'),
+      path.join(__dirname, './assets/iconTemplate@2x.png'),
+      path.join(__dirname, './assets/iconUpdateTemplate.png'),
+      path.join(__dirname, './assets/iconUpdateTemplate@2x.png'),
+      path.join(__dirname, './assets/iconDarkTemplate.png'),
+      path.join(__dirname, './assets/iconDarkTemplate@2x.png'),
+      path.join(__dirname, './assets/iconDarkUpdateTemplate.png'),
+      path.join(__dirname, './assets/iconDarkUpdateTemplate@2x.png'),
+    ],
+    ...(process.env.SIGN
+      ? {
+          osxSign: {
+            identity: process.env.APPLE_IDENTITY,
+          },
+          osxNotarize: {
+            tool: 'notarytool',
+            appleId: process.env.APPLE_ID || '',
+            appleIdPassword: process.env.APPLE_PASSWORD || '',
+            teamId: process.env.APPLE_TEAM_ID || '',
+          },
+        }
+      : {}),
+    osxUniversal: {
+      x64ArchFiles: '**/ollama',
+    },
+  },
+  rebuildConfig: {},
+  makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
+  hooks: {
+    readPackageJson: async (_, packageJson) => {
+      return { ...packageJson, version: process.env.VERSION || packageJson.version }
+    },
+  },
+  plugins: [
+    new AutoUnpackNativesPlugin({}),
+    new WebpackPlugin({
+      mainConfig,
+      devContentSecurityPolicy: `default-src * 'unsafe-eval' 'unsafe-inline'; img-src data: 'self'`,
+      renderer: {
+        config: rendererConfig,
+        nodeIntegration: true,
+        entryPoints: [
+          {
+            html: './src/index.html',
+            js: './src/renderer.tsx',
+            name: 'main_window',
+            preload: {
+              js: './src/preload.ts',
+            },
+          },
+        ],
+      },
+    }),
+  ],
+}
+
+export default config
--- a/macapp/package-lock.json
+++ b/macapp/package-lock.json
--- a/macapp/package.json
+++ b/macapp/package.json
@@ -0,0 +1,84 @@
+{
+  "name": "ollama",
+  "productName": "Ollama",
+  "version": "0.0.0",
+  "description": "ollama",
+  "main": ".webpack/main",
+  "scripts": {
+    "start": "electron-forge start",
+    "package": "electron-forge package --arch universal",
+    "package:sign": "SIGN=1 electron-forge package --arch universal",
+    "make": "electron-forge make --arch universal",
+    "make:sign": "SIGN=1 electron-forge make --arch universal",
+    "publish": "SIGN=1 electron-forge publish",
+    "lint": "eslint --ext .ts,.tsx .",
+    "format": "prettier --check . --ignore-path .gitignore",
+    "format:fix": "prettier --write . --ignore-path .gitignore"
+  },
+  "keywords": [],
+  "author": {
+    "name": "Jeffrey Morgan",
+    "email": "jmorganca@gmail.com"
+  },
+  "license": "MIT",
+  "devDependencies": {
+    "@babel/core": "^7.22.5",
+    "@babel/preset-react": "^7.22.5",
+    "@electron-forge/cli": "^6.2.1",
+    "@electron-forge/maker-deb": "^6.2.1",
+    "@electron-forge/maker-rpm": "^6.2.1",
+    "@electron-forge/maker-squirrel": "^6.2.1",
+    "@electron-forge/maker-zip": "^6.2.1",
+    "@electron-forge/plugin-auto-unpack-natives": "^6.2.1",
+    "@electron-forge/plugin-webpack": "^6.2.1",
+    "@electron-forge/publisher-github": "^6.2.1",
+    "@electron/universal": "^1.4.1",
+    "@svgr/webpack": "^8.0.1",
+    "@types/chmodr": "^1.0.0",
+    "@types/node": "^20.4.0",
+    "@types/react": "^18.2.14",
+    "@types/react-dom": "^18.2.6",
+    "@types/uuid": "^9.0.2",
+    "@typescript-eslint/eslint-plugin": "^5.60.0",
+    "@typescript-eslint/parser": "^5.60.0",
+    "@vercel/webpack-asset-relocator-loader": "^1.7.3",
+    "babel-loader": "^9.1.2",
+    "chmodr": "^1.2.0",
+    "copy-webpack-plugin": "^11.0.0",
+    "css-loader": "^6.8.1",
+    "electron": "25.9.2",
+    "eslint": "^8.43.0",
+    "eslint-plugin-import": "^2.27.5",
+    "fork-ts-checker-webpack-plugin": "^7.3.0",
+    "node-loader": "^2.0.0",
+    "postcss": "^8.4.24",
+    "postcss-import": "^15.1.0",
+    "postcss-loader": "^7.3.3",
+    "postcss-preset-env": "^8.5.1",
+    "prettier": "^2.8.8",
+    "prettier-plugin-tailwindcss": "^0.3.0",
+    "style-loader": "^3.3.3",
+    "svg-inline-loader": "^0.8.2",
+    "tailwindcss": "^3.3.2",
+    "ts-loader": "^9.4.3",
+    "ts-node": "^10.9.1",
+    "typescript": "~4.5.4",
+    "url-loader": "^4.1.1",
+    "webpack": "^5.88.0",
+    "webpack-cli": "^5.1.4",
+    "webpack-dev-server": "^4.15.1"
+  },
+  "dependencies": {
+    "@electron/remote": "^2.0.10",
+    "@heroicons/react": "^2.0.18",
+    "@segment/analytics-node": "^1.0.0",
+    "copy-to-clipboard": "^3.3.3",
+    "electron-squirrel-startup": "^1.0.0",
+    "electron-store": "^8.1.0",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0",
+    "uuid": "^9.0.0",
+    "winston": "^3.10.0",
+    "winston-daily-rotate-file": "^4.7.1"
+  }
+}
--- a/macapp/postcss.config.js
+++ b/macapp/postcss.config.js
@@ -0,0 +1,7 @@
+module.exports = {
+  plugins: {
+    'postcss-import': {},
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
--- a/macapp/src/app.css
+++ b/macapp/src/app.css
@@ -0,0 +1,34 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+html,
+body {
+  background: transparent;
+}
+
+.drag {
+  -webkit-app-region: drag;
+}
+
+.no-drag {
+  -webkit-app-region: no-drag;
+}
+
+.blink {
+  -webkit-animation: 1s blink step-end infinite;
+  -moz-animation: 1s blink step-end infinite;
+  -ms-animation: 1s blink step-end infinite;
+  -o-animation: 1s blink step-end infinite;
+  animation: 1s blink step-end infinite;
+}
+
+@keyframes blink {
+  from,
+  to {
+    color: transparent;
+  }
+  50% {
+    color: black;
+  }
+}
--- a/macapp/src/app.tsx
+++ b/macapp/src/app.tsx
@@ -0,0 +1,122 @@
+import { useState } from 'react'
+import copy from 'copy-to-clipboard'
+import { CheckIcon, DocumentDuplicateIcon } from '@heroicons/react/24/outline'
+import Store from 'electron-store'
+import { getCurrentWindow, app } from '@electron/remote'
+
+import { install } from './install'
+import OllamaIcon from './ollama.svg'
+
+const store = new Store()
+
+enum Step {
+  WELCOME = 0,
+  CLI,
+  FINISH,
+}
+
+export default function () {
+  const [step, setStep] = useState<Step>(Step.WELCOME)
+  const [commandCopied, setCommandCopied] = useState<boolean>(false)
+
+  const command = 'ollama run llama3'
+
+  return (
+    <div className='drag'>
+      <div className='mx-auto flex min-h-screen w-full flex-col justify-between bg-white px-4 pt-16'>
+        {step === Step.WELCOME && (
+          <>
+            <div className='mx-auto text-center'>
+              <h1 className='mb-6 mt-4 text-2xl tracking-tight text-gray-900'>Welcome to Ollama</h1>
+              <p className='mx-auto w-[65%] text-sm text-gray-400'>
+                Let's get you up and running with your own large language models.
+              </p>
+              <button
+                onClick={() => setStep(Step.CLI)}
+                className='no-drag rounded-dm mx-auto my-8 w-[40%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
+              >
+                Next
+              </button>
+            </div>
+            <div className='mx-auto'>
+              <OllamaIcon />
+            </div>
+          </>
+        )}
+        {step === Step.CLI && (
+          <>
+            <div className='mx-auto flex flex-col space-y-28 text-center'>
+              <h1 className='mt-4 text-2xl tracking-tight text-gray-900'>Install the command line</h1>
+              <pre className='mx-auto text-4xl text-gray-400'>&gt; ollama</pre>
+              <div className='mx-auto'>
+                <button
+                  onClick={async () => {
+                    try {
+                      await install()
+                      setStep(Step.FINISH)
+                    } catch (e) {
+                      console.error('could not install: ', e)
+                    } finally {
+                      getCurrentWindow().show()
+                      getCurrentWindow().focus()
+                    }
+                  }}
+                  className='no-drag rounded-dm mx-auto w-[60%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
+                >
+                  Install
+                </button>
+                <p className='mx-auto my-4 w-[70%] text-xs text-gray-400'>
+                  You will be prompted for administrator access
+                </p>
+              </div>
+            </div>
+          </>
+        )}
+        {step === Step.FINISH && (
+          <>
+            <div className='mx-auto flex flex-col space-y-20 text-center'>
+              <h1 className='mt-4 text-2xl tracking-tight text-gray-900'>Run your first model</h1>
+              <div className='flex flex-col'>
+                <div className='group relative flex items-center'>
+                  <pre className='language-none text-2xs w-full rounded-md bg-gray-100 px-4 py-3 text-start leading-normal'>
+                    {command}
+                  </pre>
+                  <button
+                    className={`no-drag absolute right-[5px] px-2 py-2 ${
+                      commandCopied
+                        ? 'text-gray-900 opacity-100 hover:cursor-auto'
+                        : 'text-gray-200 opacity-50 hover:cursor-pointer'
+                    } hover:font-bold hover:text-gray-900 group-hover:opacity-100`}
+                    onClick={() => {
+                      copy(command)
+                      setCommandCopied(true)
+                      setTimeout(() => setCommandCopied(false), 3000)
+                    }}
+                  >
+                    {commandCopied ? (
+                      <CheckIcon className='h-4 w-4 font-bold text-gray-500' />
+                    ) : (
+                      <DocumentDuplicateIcon className='h-4 w-4 text-gray-500' />
+                    )}
+                  </button>
+                </div>
+                <p className='mx-auto my-4 w-[70%] text-xs text-gray-400'>
+                  Run this command in your favorite terminal.
+                </p>
+              </div>
+              <button
+                onClick={() => {
+                  store.set('first-time-run', true)
+                  window.close()
+                }}
+                className='no-drag rounded-dm mx-auto w-[60%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
+              >
+                Finish
+              </button>
+            </div>
+          </>
+        )}
+      </div>
+    </div>
+  )
+}
--- a/macapp/src/declarations.d.ts
+++ b/macapp/src/declarations.d.ts
@@ -0,0 +1,4 @@
+declare module '*.svg' {
+  const content: string
+  export default content
+}
--- a/macapp/src/index.html
+++ b/macapp/src/index.html
@@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="UTF-8" />
+  </head>
+  <body>
+    <div id="app"></div>
+  </body>
+</html>
--- a/macapp/src/index.ts
+++ b/macapp/src/index.ts
@@ -0,0 +1,302 @@
+import { spawn, ChildProcess } from 'child_process'
+import { app, autoUpdater, dialog, Tray, Menu, BrowserWindow, MenuItemConstructorOptions, nativeTheme } from 'electron'
+import Store from 'electron-store'
+import winston from 'winston'
+import 'winston-daily-rotate-file'
+import * as path from 'path'
+
+import { v4 as uuidv4 } from 'uuid'
+import { installed } from './install'
+
+require('@electron/remote/main').initialize()
+
+if (require('electron-squirrel-startup')) {
+  app.quit()
+}
+
+const store = new Store()
+
+let welcomeWindow: BrowserWindow | null = null
+
+declare const MAIN_WINDOW_WEBPACK_ENTRY: string
+
+const logger = winston.createLogger({
+  transports: [
+    new winston.transports.Console(),
+    new winston.transports.File({
+      filename: path.join(app.getPath('home'), '.ollama', 'logs', 'server.log'),
+      maxsize: 1024 * 1024 * 20,
+      maxFiles: 5,
+    }),
+  ],
+  format: winston.format.printf(info => info.message),
+})
+
+app.on('ready', () => {
+  const gotTheLock = app.requestSingleInstanceLock()
+  if (!gotTheLock) {
+    app.exit(0)
+    return
+  }
+
+  app.on('second-instance', () => {
+    if (app.hasSingleInstanceLock()) {
+      app.releaseSingleInstanceLock()
+    }
+
+    if (proc) {
+      proc.off('exit', restart)
+      proc.kill()
+    }
+
+    app.exit(0)
+  })
+
+  app.focus({ steal: true })
+
+  init()
+})
+
+function firstRunWindow() {
+  // Create the browser window.
+  welcomeWindow = new BrowserWindow({
+    width: 400,
+    height: 500,
+    frame: false,
+    fullscreenable: false,
+    resizable: false,
+    movable: true,
+    show: false,
+    webPreferences: {
+      nodeIntegration: true,
+      contextIsolation: false,
+    },
+  })
+
+  require('@electron/remote/main').enable(welcomeWindow.webContents)
+
+  welcomeWindow.loadURL(MAIN_WINDOW_WEBPACK_ENTRY)
+  welcomeWindow.on('ready-to-show', () => welcomeWindow.show())
+  welcomeWindow.on('closed', () => {
+    if (process.platform === 'darwin') {
+      app.dock.hide()
+    }
+  })
+}
+
+let tray: Tray | null = null
+let updateAvailable = false
+const assetPath = app.isPackaged ? process.resourcesPath : path.join(__dirname, '..', '..', 'assets')
+
+function trayIconPath() {
+  return nativeTheme.shouldUseDarkColors
+    ? updateAvailable
+      ? path.join(assetPath, 'iconDarkUpdateTemplate.png')
+      : path.join(assetPath, 'iconDarkTemplate.png')
+    : updateAvailable
+    ? path.join(assetPath, 'iconUpdateTemplate.png')
+    : path.join(assetPath, 'iconTemplate.png')
+}
+
+function updateTrayIcon() {
+  if (tray) {
+    tray.setImage(trayIconPath())
+  }
+}
+
+function updateTray() {
+  const updateItems: MenuItemConstructorOptions[] = [
+    { label: 'An update is available', enabled: false },
+    {
+      label: 'Restart to update',
+      click: () => autoUpdater.quitAndInstall(),
+    },
+    { type: 'separator' },
+  ]
+
+  const menu = Menu.buildFromTemplate([
+    ...(updateAvailable ? updateItems : []),
+    { role: 'quit', label: 'Quit Ollama', accelerator: 'Command+Q' },
+  ])
+
+  if (!tray) {
+    tray = new Tray(trayIconPath())
+  }
+
+  tray.setToolTip(updateAvailable ? 'An update is available' : 'Ollama')
+  tray.setContextMenu(menu)
+  tray.setImage(trayIconPath())
+
+  nativeTheme.off('updated', updateTrayIcon)
+  nativeTheme.on('updated', updateTrayIcon)
+}
+
+let proc: ChildProcess = null
+
+function server() {
+  const binary = app.isPackaged
+    ? path.join(process.resourcesPath, 'ollama')
+    : path.resolve(process.cwd(), '..', 'ollama')
+
+  proc = spawn(binary, ['serve'])
+
+  proc.stdout.on('data', data => {
+    logger.info(data.toString().trim())
+  })
+
+  proc.stderr.on('data', data => {
+    logger.error(data.toString().trim())
+  })
+
+  proc.on('exit', restart)
+}
+
+function restart() {
+  setTimeout(server, 1000)
+}
+
+app.on('before-quit', () => {
+  if (proc) {
+    proc.off('exit', restart)
+    proc.kill('SIGINT') // send SIGINT signal to the server, which also stops any loaded llms
+  }
+})
+
+const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
+  process.arch
+}&version=${app.getVersion()}&id=${id()}`
+
+let latest = ''
+async function isNewReleaseAvailable() {
+  try {
+    const response = await fetch(updateURL)
+
+    if (!response.ok) {
+      return false
+    }
+
+    if (response.status === 204) {
+      return false
+    }
+
+    const data = await response.json()
+
+    const url = data?.url
+    if (!url) {
+      return false
+    }
+
+    if (latest === url) {
+      return false
+    }
+
+    latest = url
+
+    return true
+  } catch (error) {
+    logger.error(`update check failed - ${error}`)
+    return false
+  }
+}
+
+async function checkUpdate() {
+  const available = await isNewReleaseAvailable()
+  if (available) {
+    logger.info('checking for update')
+    autoUpdater.checkForUpdates()
+  }
+}
+
+function init() {
+  if (app.isPackaged) {
+    checkUpdate()
+    setInterval(() => {
+      checkUpdate()
+    }, 60 * 60 * 1000)
+  }
+
+  updateTray()
+
+  if (process.platform === 'darwin') {
+    if (app.isPackaged) {
+      if (!app.isInApplicationsFolder()) {
+        const chosen = dialog.showMessageBoxSync({
+          type: 'question',
+          buttons: ['Move to Applications', 'Do Not Move'],
+          message: 'Ollama works best when run from the Applications directory.',
+          defaultId: 0,
+          cancelId: 1,
+        })
+
+        if (chosen === 0) {
+          try {
+            app.moveToApplicationsFolder({
+              conflictHandler: conflictType => {
+                if (conflictType === 'existsAndRunning') {
+                  dialog.showMessageBoxSync({
+                    type: 'info',
+                    message: 'Cannot move to Applications directory',
+                    detail:
+                      'Another version of Ollama is currently running from your Applications directory. Close it first and try again.',
+                  })
+                }
+                return true
+              },
+            })
+            return
+          } catch (e) {
+            logger.error(`[Move to Applications] Failed to move to applications folder - ${e.message}}`)
+          }
+        }
+      }
+    }
+  }
+
+  server()
+
+  if (store.get('first-time-run') && installed()) {
+    if (process.platform === 'darwin') {
+      app.dock.hide()
+    }
+
+    app.setLoginItemSettings({ openAtLogin: app.getLoginItemSettings().openAtLogin })
+    return
+  }
+
+  // This is the first run or the CLI is no longer installed
+  app.setLoginItemSettings({ openAtLogin: true })
+  firstRunWindow()
+}
+
+// Quit when all windows are closed, except on macOS. There, it's common
+// for applications and their menu bar to stay active until the user quits
+// explicitly with Cmd + Q.
+app.on('window-all-closed', () => {
+  if (process.platform !== 'darwin') {
+    app.quit()
+  }
+})
+
+function id(): string {
+  const id = store.get('id') as string
+
+  if (id) {
+    return id
+  }
+
+  const uuid = uuidv4()
+  store.set('id', uuid)
+  return uuid
+}
+
+autoUpdater.setFeedURL({ url: updateURL })
+
+autoUpdater.on('error', e => {
+  logger.error(`update check failed - ${e.message}`)
+  console.error(`update check failed - ${e.message}`)
+})
+
+autoUpdater.on('update-downloaded', () => {
+  updateAvailable = true
+  updateTray()
+})
--- a/macapp/src/install.ts
+++ b/macapp/src/install.ts
@@ -0,0 +1,21 @@
+import * as fs from 'fs'
+import { exec as cbExec } from 'child_process'
+import * as path from 'path'
+import { promisify } from 'util'
+
+const app = process && process.type === 'renderer' ? require('@electron/remote').app : require('electron').app
+const ollama = app.isPackaged ? path.join(process.resourcesPath, 'ollama') : path.resolve(process.cwd(), '..', 'ollama')
+const exec = promisify(cbExec)
+const symlinkPath = '/usr/local/bin/ollama'
+
+export function installed() {
+  return fs.existsSync(symlinkPath) && fs.readlinkSync(symlinkPath) === ollama
+}
+
+export async function install() {
+  const command = `do shell script "mkdir -p ${path.dirname(
+    symlinkPath
+  )} && ln -F -s \\"${ollama}\\" \\"${symlinkPath}\\"" with administrator privileges`
+
+  await exec(`osascript -e '${command}'`)
+}
--- a/macapp/src/ollama.svg
+++ b/macapp/src/ollama.svg
--- a/macapp/src/preload.ts
+++ b/macapp/src/preload.ts
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bruce MacDonald	d40497b9a2	Add support for IQ1_S, IQ3_S, IQ2_S, IQ4_XS. IQ4_NL Co-authored-by: ManniX-ITA <20623405+mann1x@users.noreply.github.com>	2024-05-03 14:51:07 -07:00
Michael Yang	828e4bf101	s/DisplayLongest/String/	2024-05-03 13:18:28 -07:00
Michael Yang	05105903d8	only quantize language models	2024-05-03 13:18:28 -07:00
Michael Yang	abf3b1fb34	no iterator	2024-05-03 13:18:28 -07:00
Michael Yang	82fcc0601d	rebase	2024-05-03 13:18:28 -07:00
Michael Yang	185a927210	comments	2024-05-03 13:18:28 -07:00
Michael Yang	096ea2c8c3	update tests	2024-05-03 13:18:28 -07:00
Michael Yang	06b31e2e24	quantize any fp16/fp32 model - FROM /path/to/{safetensors,pytorch} - FROM /path/to/fp{16,32}.bin - FROM model:fp{16,32}	2024-05-03 13:18:28 -07:00
Michael Yang	b7a87a22b6	Merge pull request #4059 from ollama/mxyng/parser-2 rename parser to model/file	2024-05-03 13:01:22 -07:00
Dr Nic Williams	e8aaea030e	Update 'llama2' -> 'llama3' in most places (#4116 ) * Update 'llama2' -> 'llama3' in most places --------- Co-authored-by: Patrick Devine <patrick@infrahq.com>	2024-05-03 15:25:04 -04:00
Daniel Hiltgen	267e25a750	Merge pull request #4129 from dhiltgen/unit_tests Soften timeouts on sched unit tests	2024-05-03 11:10:26 -07:00
Daniel Hiltgen	9a32c514cb	Soften timeouts on sched unit tests This gives us more headroom on the scheduler tests to tamp down some flakes.	2024-05-03 09:08:33 -07:00
Michael Yang	e9ae607ece	Merge pull request #3892 from ollama/mxyng/parser refactor modelfile parser	2024-05-02 17:04:47 -07:00
Michael Yang	93707fa3f2	Merge pull request #4108 from ollama/mxyng/lf fix line ending	2024-05-02 14:55:15 -07:00
Michael Yang	94c369095f	fix line ending replace CRLF with LF	2024-05-02 14:53:13 -07:00
Jeffrey Morgan	9164b0161b	Update .gitattributes	2024-05-02 14:06:31 -04:00
Bryce Reitano	bf4fc25f7b	Add a /clear command (#3947 ) * Add a /clear command * change help messages --------- Co-authored-by: Patrick Devine <patrick@infrahq.com>	2024-05-01 17:44:36 -04:00
Michael Yang	5b806d8d24	Merge pull request #4089 from ollama/mxyng/target-invalid server: destination invalid	2024-05-01 12:46:35 -07:00
Michael Yang	cb1e072643	Merge pull request #4087 from ollama/mxyng/fix-host-port types/model: fix name for hostport	2024-05-01 12:42:07 -07:00
Michael Yang	45b6a12e45	server: target invalid	2024-05-01 12:40:45 -07:00
alwqx	68755f1f5e	chore: fix typo in docs/development.md (#4073 )	2024-05-01 15:39:11 -04:00
Michael Yang	997a455039	want filepath	2024-05-01 12:33:41 -07:00
Michael Yang	88775e1ff9	strip scheme from name	2024-05-01 12:26:19 -07:00
Michael Yang	8867e744ff	types/model: fix name for hostport	2024-05-01 12:14:53 -07:00
Daniel Hiltgen	4fd064bea6	Merge pull request #4031 from MarkWard0110/fix/issue-3736 Fix/issue 3736: When runners are closing or expiring. Scheduler is getting dirty VRAM size readings.	2024-05-01 12:13:26 -07:00
Jeffrey Morgan	59fbceedcc	use lf for line endings (#4085 )	2024-05-01 15:02:45 -04:00
Mark Ward	321d57e1a0	Removing go routine calling .wait from load.	2024-05-01 18:51:10 +00:00
Mark Ward	ba26c7aa00	it will always return an error due to Kill() discarding Wait() errors	2024-05-01 18:51:10 +00:00
Mark Ward	63c763685f	log when the waiting for the process to stop to help debug when other tasks execute during this wait. expire timer clear the timer reference because it will not be reused. close will clean up expireTimer if calling code has not already done this.	2024-05-01 18:51:10 +00:00
Mark Ward	34a4a94f13	ignore debug bin files	2024-05-01 18:51:10 +00:00
Mark Ward	f4a73d57a4	fix runner expire during active use. Clearing the expire timer as it is used. Allowing the finish to assign an expire timer so that the runner will expire after no use.	2024-05-01 18:51:10 +00:00
Mark Ward	948114e3e3	fix sched to wait for the runner to terminate to ensure following vram check will be more accurate	2024-05-01 18:51:10 +00:00
Arpit Jain	a3e60d9058	README.md: fix typos (#4007 ) Co-authored-by: Blake Mizerany <blake.mizerany@gmail.com>	2024-05-01 10:39:38 -07:00
Michael Yang	8acb233668	use strings.Builder	2024-05-01 10:01:09 -07:00
Michael Yang	119589fcb3	rename parser to model/file	2024-05-01 09:53:50 -07:00
Michael Yang	5ea844964e	cmd: import regexp	2024-05-01 09:53:45 -07:00
Michael Yang	bd8eed57fc	fix parser name	2024-05-01 09:52:54 -07:00
Michael Yang	9cf0f2e973	use parser.Format instead of templating modelfile	2024-05-01 09:52:54 -07:00
Michael Yang	176ad3aa6e	parser: add commands format	2024-05-01 09:52:54 -07:00
Michael Yang	4d08363580	comments	2024-05-01 09:52:54 -07:00
Michael Yang	8907bf51d2	fix multiline	2024-05-01 09:52:54 -07:00
Michael Yang	abe614c705	tests	2024-05-01 09:52:54 -07:00
Michael Yang	238715037d	linting	2024-05-01 09:52:54 -07:00
Michael Yang	c0a00f68ae	refactor modelfile parser	2024-05-01 09:52:54 -07:00
Jeffrey Morgan	f0c454ab57	gpu: add 512MiB to darwin minimum, metal doesn't have partial offloading overhead (#4068 )	2024-05-01 11:46:03 -04:00
Blake Mizerany	b9f74ff3d6	types/model: reintroduce Digest (#4065 )	2024-04-30 16:38:03 -07:00
jmorganca	fcf4d60eee	llm: add back check for empty token cache	2024-04-30 17:38:44 -04:00
jmorganca	e33d5c2dbc	update llama.cpp commit to `952d03d`	2024-04-30 17:31:20 -04:00
Jeffrey Morgan	18d9a7e1f1	update llama.cpp submodule to `f364eb6` (#4060 )	2024-04-30 17:25:39 -04:00
Michael	8488388cbd	Update README.md	2024-04-30 15:45:56 -04:00
Blake Mizerany	588901f449	types/model: reduce Name.Filepath allocs from 5 to 2 (#4039 )	2024-04-30 11:09:19 -07:00
Bruce MacDonald	0a7fdbe533	prompt to display and add local ollama keys to account (#3717 ) - return descriptive error messages when unauthorized to create blob or push a model - display the local public key associated with the request that was denied	2024-04-30 11:02:08 -07:00
Christian Frantzen	5950c176ca	Update langchainpy.md (#4037 ) Updated the code a bit	2024-04-29 23:19:06 -04:00
Daniel Hiltgen	23d23409a0	Update llama.cpp (#4036 ) * Bump llama.cpp to b2761 * Adjust types for bump	2024-04-29 23:18:48 -04:00
Patrick Devine	9009bedf13	better checking for OLLAMA_HOST variable (#3661 )	2024-04-29 19:14:07 -04:00
Daniel Hiltgen	d4ac57e240	Merge pull request #4035 from dhiltgen/fix_relative_paths Fix relative path lookup	2024-04-29 16:08:06 -07:00
Daniel Hiltgen	7b59d1770f	Fix relative path lookup	2024-04-29 16:00:08 -07:00
Jeffrey Morgan	95ead8ffba	Restart server on failure when running Windows app (#3985 ) * app: restart server on failure * fix linter * address comments * refactor log directory creation to be where logs are written * check all log dir creation errors	2024-04-29 10:07:52 -04:00
Jeffrey Morgan	7aa08a77ca	llm: dont cap context window limit to training context window (#3988 )	2024-04-29 10:07:30 -04:00
Blake Mizerany	7e432cdfac	types/model: remove old comment (#4020 )	2024-04-28 20:52:26 -07:00
Jeffrey Morgan	586672f490	fix copying model to itself (#4019 )	2024-04-28 23:47:49 -04:00
Daniel Hiltgen	b03408de74	Merge pull request #3972 from hmartinez82/win_arm64 Add support for building on Windows ARM64	2024-04-28 14:52:58 -07:00
Daniel Hiltgen	1e6a28bf5b	Merge pull request #4009 from dhiltgen/cpu_concurrency Fix concurrency for CPU mode	2024-04-28 14:20:27 -07:00
Daniel Hiltgen	d6e3b64582	Fix concurrency for CPU mode Prior refactoring passes accidentally removed the logic to bypass VRAM checks for CPU loads. This adds that back, along with test coverage. This also fixes loaded map access in the unit test to be behind the mutex which was likely the cause of various flakes in the tests.	2024-04-28 13:42:39 -07:00
Blake Mizerany	114c932a8e	types/model: allow _ as starter character in Name parts (#3991 )	2024-04-27 21:24:52 -07:00
Jeffrey Morgan	7f7103de06	mac: update setup command to `llama3` (#3986 )	2024-04-27 22:52:10 -04:00
Blake Mizerany	c631a9c726	types/model: relax name length constraint from 2 to 1 (#3984 )	2024-04-27 17:58:41 -07:00
Blake Mizerany	8fd9e56804	types/structs: drop unused structs package (#3981 )	2024-04-27 14:06:11 -07:00
Hernan Martinez	8a65717f55	Do not build AVX runners on ARM64	2024-04-26 23:55:32 -06:00
Hernan Martinez	6d3152a98a	Use architecture specific folders in installer script	2024-04-26 23:35:16 -06:00
Hernan Martinez	b438d485f1	Use architecture specific folders in the generate script	2024-04-26 23:34:12 -06:00
Hernan Martinez	204349b17b	Use architecture specific folders in the build script	2024-04-26 23:26:03 -06:00
Hernan Martinez	86e67fc4a9	Add import declaration for windows,arm64 to llm.go	2024-04-26 23:23:53 -06:00