Compare commits

...

52 Commits

Author SHA1 Message Date
jmorganca
201a987ff9 some more menu options... 2024-04-28 12:40:52 -04:00
jmorganca
2d8125042a Touch ID for cli install; server restarts 2024-04-27 22:42:38 -04:00
jmorganca
776e7bb5e4 app: fix status item icons 2024-04-27 15:57:57 -04:00
jmorganca
b8d7ca1a7b Native implementation of macOS app 2024-04-27 14:20:10 -04:00
Blake Mizerany
2bed62926e types/model: remove Digest (for now) (#3970)
The Digest type needs more thought and is not necessary at the moment.
2024-04-26 21:14:28 -07:00
Jeffrey Morgan
aad8d128a0 also look at cwd as a root for windows runners (#3959) 2024-04-26 19:14:08 -04:00
Daniel Hiltgen
ec1acbb867 Merge pull request #3968 from dhiltgen/win_generate
Fine grain control over windows generate steps
2024-04-26 16:03:38 -07:00
Daniel Hiltgen
e4859c4563 Fine grain control over windows generate steps
This will speed up CI which already tries to only build static for unit tests
2024-04-26 15:49:46 -07:00
Nataly Merezhuk
8e30eb26bd Updates the setup command to use llama3. (#3962) 2024-04-26 18:41:01 -04:00
Daniel Hiltgen
0b5c589ca2 Merge pull request #3966 from dhiltgen/bump
Fix target in gen_windows.ps1
2024-04-26 15:36:53 -07:00
Michael Yang
65fadddc85 Merge pull request #3964 from ollama/mxyng/weights
fix gemma, command-r layer weights
2024-04-26 15:23:33 -07:00
Daniel Hiltgen
ed5fb088c4 Fix target in gen_windows.ps1 2024-04-26 15:10:42 -07:00
Michael Yang
f81f308118 fix gemma, command-r layer weights 2024-04-26 15:00:55 -07:00
Blake Mizerany
b1390a7b37 types/model: export ParseNameBare and Merge (#3957)
These are useful outside this package.
2024-04-26 14:58:07 -07:00
Michael Yang
11d83386a5 Merge pull request #3951 from ollama/mxyng/zip
check file type before zip
2024-04-26 14:51:23 -07:00
Jeffrey Morgan
bb31def011 return code 499 when user cancels request while a model is loading (#3955) 2024-04-26 17:38:29 -04:00
Michael Yang
41e03ede95 check file type before zip 2024-04-26 14:18:07 -07:00
Michael Yang
7fea1ecdf6 Merge pull request #3958 from ollama/mxyng/fix-workflow
use merge base for diff-tree
2024-04-26 14:17:56 -07:00
Blake Mizerany
054894271d .github/workflows/test.yaml: add in-flight cancellations on new push (#3956)
Also, remove a superfluous 'go get'
2024-04-26 13:54:24 -07:00
Michael Yang
6fef042f0b use merge base for diff-tree 2024-04-26 13:54:15 -07:00
Daniel Hiltgen
5c0c2d1d09 Merge pull request #3954 from dhiltgen/ci_fixes
Put back non-avx CPU build for windows
2024-04-26 13:09:03 -07:00
Blake Mizerany
37f9c8ad99 types/model: overhaul Name and Digest types (#3924) 2024-04-26 13:08:32 -07:00
Quinten van Buul
2a80f55e2a Update windows.md (#3855)
Fixed a typo
2024-04-26 16:04:15 -04:00
Daniel Hiltgen
421c878a2d Put back non-avx CPU build for windows 2024-04-26 12:44:07 -07:00
Daniel Hiltgen
36666c2142 Merge pull request #3925 from dhiltgen/bump
Bump llama.cpp to b2737
2024-04-26 10:09:38 -07:00
Daniel Hiltgen
85801317d1 Fix clip log import 2024-04-26 09:43:46 -07:00
Daniel Hiltgen
2ed0d65948 Bump llama.cpp to b2737 2024-04-26 09:43:28 -07:00
Daniel Hiltgen
d459dc4ad1 Merge pull request #3950 from dhiltgen/windows_packaging
Fix exe name for zip packaging on windows
2024-04-26 09:27:37 -07:00
Daniel Hiltgen
40bc4622ef Fix exe name for zip packaging on windows
The zip file encodes the OS and architecture, so keep the short exe name
2024-04-26 09:18:05 -07:00
Daniel Hiltgen
c0f818a07a Merge pull request #3948 from dhiltgen/win_generate
Refactor windows generate for more modular usage
2024-04-26 09:17:20 -07:00
Daniel Hiltgen
8671fdeda6 Refactor windows generate for more modular usage 2024-04-26 08:35:50 -07:00
Daniel Hiltgen
2619850fb4 Merge pull request #3933 from dhiltgen/ci_fixes
Move cuda/rocm dependency gathering into generate script
2024-04-26 07:01:24 -07:00
Daniel Hiltgen
8feb97dc0d Move cuda/rocm dependency gathering into generate script
This will make it simpler for CI to accumulate artifacts from prior steps
2024-04-25 22:38:44 -07:00
Daniel Hiltgen
4e1ff6dcbb Merge pull request #3926 from dhiltgen/ci_fixes
Fix release CI
2024-04-25 17:42:31 -07:00
Daniel Hiltgen
8589d752ac Fix release CI
download-artifact path was being used incorrectly.  It is where to
extract the zip not the files in the zip to extract.  Default is
workspace dir which is what we want, so omit it
2024-04-25 17:27:11 -07:00
Michael Yang
de4ded68b0 Merge pull request #3923 from ollama/mxyng/mem
only count output tensors
2024-04-25 16:34:17 -07:00
Daniel Hiltgen
9b5a3c5991 Merge pull request #3914 from dhiltgen/mac_perf
Improve mac parallel performance
2024-04-25 16:28:31 -07:00
Jeffrey Morgan
00b0699c75 Reload model if num_gpu changes (#3920)
* reload model if `num_gpu` changes

* dont reload on -1

* fix tests
2024-04-25 19:02:40 -04:00
Jeffrey Morgan
993cf8bf55 llm: limit generation to 10x context size to avoid run on generations (#3918)
* llm: limit generation to 10x context size to avoid run on generations

* add comment

* simplify condition statement
2024-04-25 19:02:30 -04:00
Michael Yang
7bb7cb8a60 only count output tensors 2024-04-25 15:24:08 -07:00
Daniel Hiltgen
b123be5b71 Adjust context size for parallelism 2024-04-25 13:58:54 -07:00
jmorganca
ddf5c09a9b use matrix multiplcation kernels in more cases 2024-04-25 13:58:54 -07:00
Roy Yang
5f73c08729 Remove trailing spaces (#3889) 2024-04-25 14:32:26 -04:00
Daniel Hiltgen
f503a848c2 Merge pull request #3895 from brycereitano/shiftloading
Move ggml loading to when attempting to fit
2024-04-25 09:24:08 -07:00
Bryce Reitano
36a6daccab Restructure loading conditional chain 2024-04-24 17:37:03 -06:00
Bryce Reitano
ceb0e26e5e Provide variable ggml for TestLoad 2024-04-24 17:19:55 -06:00
Bryce Reitano
284e02bed0 Move ggml loading to when we attempt fitting 2024-04-24 17:17:24 -06:00
Michael Yang
3450a57d4a Merge pull request #3713 from ollama/mxyng/modelname
update copy handler to use model.Name
2024-04-24 16:00:32 -07:00
Michael Yang
592dae31c8 update copy to use model.Name 2024-04-24 15:54:54 -07:00
Michael Yang
2010cbc5fa Merge pull request #3833 from ollama/mxyng/fix-from
fix: from blob
2024-04-24 15:13:47 -07:00
Michael Yang
ac0801eced only replace if it matches command 2024-04-24 14:49:26 -07:00
Michael Yang
ad66e5b060 split temp zip files 2024-04-24 14:18:01 -07:00
94 changed files with 1732 additions and 19703 deletions

View File

@@ -311,29 +311,18 @@ jobs:
- uses: actions/download-artifact@v4
with:
name: generate-windows-cpu
path: |
llm/build
dist/windows-amd64
- uses: actions/download-artifact@v4
with:
name: generate-windows-cuda
path: |
llm/build
dist/windows-amd64
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps
path: dist/deps
- uses: actions/download-artifact@v4
with:
name: windows-rocm-deps
path: dist/deps
- uses: actions/download-artifact@v4
with:
name: generate-windows-rocm
path: |
llm/build
dist/windows-amd64
- run: dir llm/build
- run: |
$gopath=(get-command go).source | split-path -parent
@@ -342,8 +331,6 @@ jobs:
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$env:PATH"
$env:OLLAMA_SKIP_GENERATE="1"
$env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
$env:HIP_PATH=$(resolve-path ".\dist\deps")
& .\scripts\build_windows.ps1
- uses: actions/upload-artifact@v4
with:

View File

@@ -1,5 +1,15 @@
name: test
concurrency:
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
# cancels running CI jobs and starts all new ones.
#
# For non-PR pushes, concurrency.group needs to be unique for every distinct
# CI run we want to have happen. Use run_id, which in practice means all
# non-PR CI runs will be allowed to run without preempting each other.
group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
cancel-in-progress: true
on:
pull_request:
paths:
@@ -21,7 +31,9 @@ jobs:
- id: changes
run: |
changed() {
git diff-tree -r --no-commit-id --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
git diff-tree -r --no-commit-id --name-only \
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
${{ github.event.pull_request.head.sha }} \
| xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
}
@@ -283,7 +295,6 @@ jobs:
with:
go-version-file: go.mod
cache: true
- run: go get
- run: |
case ${{ matrix.arch }} in
amd64) echo ARCH=x86_64 ;;

2
.gitignore vendored
View File

@@ -11,4 +11,4 @@ ggml-metal.metal
.idea
test_data
*.crt
llm/build
llm/build

View File

@@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
func DefaultOptions() Options {
return Options{
// options set on request to runner
NumPredict: -1,
NumKeep: 0,
NumPredict: -1,
// set a minimal num_keep to avoid issues on context shifts
NumKeep: 4,
Temperature: 0.8,
TopK: 40,
TopP: 0.9,

1
app/.gitignore vendored
View File

@@ -1 +1,2 @@
ollama.syso
app

7
app/AppDelegate.h Normal file
View File

@@ -0,0 +1,7 @@
#import <Cocoa/Cocoa.h>
@interface AppDelegate : NSObject <NSApplicationDelegate>
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
@end

View File

@@ -1,10 +1,6 @@
# Ollama App
## Linux
TODO
## MacOS
## macOS
TODO

76
app/app_darwin.go Normal file
View File

@@ -0,0 +1,76 @@
package main
// #cgo CFLAGS: -x objective-c
// #cgo LDFLAGS: -framework Cocoa -framework LocalAuthentication -framework ServiceManagement
// #include "app_darwin.h"
import "C"
import (
"context"
"fmt"
"log/slog"
"os"
"path/filepath"
"syscall"
)
func init() {
home, err := os.UserHomeDir()
if err != nil {
panic(err)
}
ServerLogFile = filepath.Join(home, ".ollama", "logs", "server.log")
}
func run() {
initLogging()
slog.Info("ollama macOS app started")
// Ask to move to applications directory
moving := C.askToMoveToApplications()
if moving {
return
}
C.killOtherInstances()
code := C.installSymlink()
if code != 0 {
slog.Error("Failed to install symlink")
}
exe, err := os.Executable()
if err != nil {
panic(err)
}
var options ServerOptions
ctx, cancel := context.WithCancel(context.Background())
var done chan int
done, err = SpawnServer(ctx, filepath.Join(filepath.Dir(exe), "..", "Resources", "ollama"), options)
if err != nil {
slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
done = make(chan int, 1)
done <- 1
}
// Run the native macOS app
// Note: this will block until the app is closed
C.run()
slog.Info("ollama macOS app closed")
cancel()
slog.Info("Waiting for ollama server to shutdown...")
if done != nil {
<-done
}
slog.Info("Ollama app exiting")
}
//export Quit
func Quit() {
syscall.Kill(os.Getpid(), syscall.SIGTERM)
}

13
app/app_darwin.h Normal file
View File

@@ -0,0 +1,13 @@
#import <Cocoa/Cocoa.h>
@interface AppDelegate : NSObject <NSApplicationDelegate>
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
@end
void run();
void killOtherInstances();
bool askToMoveToApplications();
int createSymlinkWithAuthorization();
int installSymlink();
extern void Restart();
extern void Quit();

282
app/app_darwin.m Normal file
View File

@@ -0,0 +1,282 @@
#import <AppKit/AppKit.h>
#import <Cocoa/Cocoa.h>
#import <CoreServices/CoreServices.h>
#import <Security/Security.h>
#import <ServiceManagement/ServiceManagement.h>
#import "app_darwin.h"
@interface AppDelegate ()
@property (strong, nonatomic) NSStatusItem *statusItem;
@end
@implementation AppDelegate
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
// show status menu
NSMenu *menu = [[NSMenu alloc] init];
NSMenuItem *aboutMenuItem = [[NSMenuItem alloc] initWithTitle:@"About Ollama" action:@selector(aboutOllama) keyEquivalent:@""];
[aboutMenuItem setTarget:self];
[menu addItem:aboutMenuItem];
// Settings submenu
NSMenu *settingsMenu = [[NSMenu alloc] initWithTitle:@"Settings"];
// Submenu items
NSMenuItem *chooseModelDirectoryItem = [[NSMenuItem alloc] initWithTitle:@"Choose model directory..." action:@selector(chooseModelDirectory) keyEquivalent:@""];
[chooseModelDirectoryItem setTarget:self];
[chooseModelDirectoryItem setEnabled:YES];
[settingsMenu addItem:chooseModelDirectoryItem];
NSMenuItem *exposeExternallyItem = [[NSMenuItem alloc] initWithTitle:@"Allow external connections" action:@selector(toggleExposeExternally:) keyEquivalent:@""];
[exposeExternallyItem setTarget:self];
[exposeExternallyItem setState:NSOffState]; // Set initial state to off
[exposeExternallyItem setEnabled:YES];
[settingsMenu addItem:exposeExternallyItem];
NSMenuItem *allowCrossOriginItem = [[NSMenuItem alloc] initWithTitle:@"Allow browser requests" action:@selector(toggleCrossOrigin:) keyEquivalent:@""];
[allowCrossOriginItem setTarget:self];
[allowCrossOriginItem setState:NSOffState]; // Set initial state to off
[allowCrossOriginItem setEnabled:YES];
[settingsMenu addItem:allowCrossOriginItem];
NSMenuItem *settingsMenuItem = [[NSMenuItem alloc] initWithTitle:@"Settings" action:nil keyEquivalent:@""];
[settingsMenuItem setSubmenu:settingsMenu];
[menu addItem:settingsMenuItem];
[menu addItemWithTitle:@"Quit Ollama" action:@selector(quit) keyEquivalent:@"q"];
self.statusItem = [[NSStatusBar systemStatusBar] statusItemWithLength:NSVariableStatusItemLength];
[self.statusItem addObserver:self forKeyPath:@"button.effectiveAppearance" options:NSKeyValueObservingOptionNew|NSKeyValueObservingOptionInitial context:nil];
self.statusItem.menu = menu;
[self showIcon];
}
- (void)aboutOllama {
[[NSApplication sharedApplication] orderFrontStandardAboutPanel:nil];
}
- (void)toggleCrossOrigin:(id)sender {
NSMenuItem *item = (NSMenuItem *)sender;
if ([item state] == NSOffState) {
// Do something when cross-origin requests are allowed
[item setState:NSOnState];
} else {
// Do something when cross-origin requests are disallowed
[item setState:NSOffState];
}
}
- (void)toggleExposeExternally:(id)sender {
NSMenuItem *item = (NSMenuItem *)sender;
if ([item state] == NSOffState) {
// Do something when Ollama is exposed externally
[item setState:NSOnState];
} else {
// Do something when Ollama is not exposed externally
[item setState:NSOffState];
}
}
- (void)chooseModelDirectory {
NSOpenPanel *openPanel = [NSOpenPanel openPanel];
[openPanel setCanChooseFiles:NO];
[openPanel setCanChooseDirectories:YES];
[openPanel setAllowsMultipleSelection:NO];
NSInteger result = [openPanel runModal];
if (result == NSModalResponseOK) {
NSURL *selectedDirectoryURL = [openPanel URLs].firstObject;
// Do something with the selected directory URL
}
}
-(void) showIcon {
NSAppearance* appearance = self.statusItem.button.effectiveAppearance;
NSString* appearanceName = (NSString*)(appearance.name);
NSString* iconName = [[appearanceName lowercaseString] containsString:@"dark"] ? @"iconDark" : @"icon";
NSImage* statusImage = [NSImage imageNamed:iconName];
[statusImage setTemplate:YES];
self.statusItem.button.image = statusImage;
}
-(void)observeValueForKeyPath:(NSString *)keyPath ofObject:(id)object change:(NSDictionary<NSKeyValueChangeKey,id> *)change context:(void *)context {
[self showIcon];
}
- (void)quit {
[NSApp stop:nil];
}
@end
void run() {
@autoreleasepool {
[NSApplication sharedApplication];
AppDelegate *appDelegate = [[AppDelegate alloc] init];
[NSApp setDelegate:appDelegate];
[NSApp run];
}
}
// killOtherInstances kills all other instances of the app currently
// running. This way we can ensure that only the most recently started
// instance of Ollama is running
void killOtherInstances() {
pid_t pid = getpid();
NSArray *all = [[NSWorkspace sharedWorkspace] runningApplications];
NSMutableArray *apps = [NSMutableArray array];
for (NSRunningApplication *app in all) {
if ([app.bundleIdentifier isEqualToString:[[NSBundle mainBundle] bundleIdentifier]] ||
[app.bundleIdentifier isEqualToString:@"ai.ollama.ollama"] ||
[app.bundleIdentifier isEqualToString:@"com.electron.ollama"]) {
if (app.processIdentifier != pid) {
[apps addObject:app];
}
}
}
for (NSRunningApplication *app in apps) {
kill(app.processIdentifier, SIGTERM);
}
NSDate *startTime = [NSDate date];
for (NSRunningApplication *app in apps) {
while (!app.terminated) {
if (-[startTime timeIntervalSinceNow] >= 5) {
kill(app.processIdentifier, SIGKILL);
break;
}
[[NSRunLoop currentRunLoop] runUntilDate:[NSDate dateWithTimeIntervalSinceNow:0.1]];
}
}
}
bool askToMoveToApplications() {
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
if ([bundlePath hasPrefix:@"/Applications"]) {
return false;
}
NSAlert *alert = [[NSAlert alloc] init];
[alert setMessageText:@"Move to Applications?"];
[alert setInformativeText:@"Ollama works best when run from the Applications directory."];
[alert addButtonWithTitle:@"Move to Applications"];
[alert addButtonWithTitle:@"Don't move"];
[NSApp activateIgnoringOtherApps:YES];
if ([alert runModal] != NSAlertFirstButtonReturn) {
return false;
}
// move to applications
NSString *applicationsPath = @"/Applications";
NSString *newPath = [applicationsPath stringByAppendingPathComponent:@"Ollama.app"];
NSFileManager *fileManager = [NSFileManager defaultManager];
// Check if the newPath already exists
if ([fileManager fileExistsAtPath:newPath]) {
NSError *removeError = nil;
[fileManager removeItemAtPath:newPath error:&removeError];
if (removeError) {
NSLog(@"Error removing file at %@: %@", newPath, removeError);
return false; // or handle the error
}
}
NSError *moveError = nil;
[fileManager moveItemAtPath:bundlePath toPath:newPath error:&moveError];
if (moveError) {
NSLog(@"Error moving file from %@ to %@: %@", bundlePath, newPath, moveError);
return false;
}
NSLog(@"Opening %@", newPath);
NSError *error = nil;
NSWorkspace *workspace = [NSWorkspace sharedWorkspace];
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
[workspace launchApplicationAtURL:[NSURL fileURLWithPath:newPath]
options:NSWorkspaceLaunchNewInstance | NSWorkspaceLaunchDefault
configuration:@{}
error:&error];
return true;
}
int installSymlink() {
NSString *linkPath = @"/usr/local/bin/ollama";
NSError *error = nil;
NSFileManager *fileManager = [NSFileManager defaultManager];
NSString *symlinkPath = [fileManager destinationOfSymbolicLinkAtPath:linkPath error:&error];
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
NSString *execPath = [[NSBundle mainBundle] executablePath];
NSString *resPath = [[NSBundle mainBundle] pathForResource:@"ollama" ofType:nil];
// if the symlink already exists and points to the right place, don't prompt
if ([symlinkPath isEqualToString:resPath]) {
NSLog(@"symbolic link already exists and points to the right place");
return 0;
}
NSString *authorizationPrompt = @"Ollama is trying to install its command line interface (CLI) tool.";
AuthorizationRef auth = NULL;
OSStatus createStatus = AuthorizationCreate(NULL, kAuthorizationEmptyEnvironment, kAuthorizationFlagDefaults, &auth);
if (createStatus != errAuthorizationSuccess) {
NSLog(@"Error creating authorization");
return -1;
}
NSString * bundleIdentifier = [[NSBundle mainBundle] bundleIdentifier];
NSString *rightNameString = [NSString stringWithFormat:@"%@.%@", bundleIdentifier, @"auth3"];
const char *rightName = rightNameString.UTF8String;
OSStatus getRightResult = AuthorizationRightGet(rightName, NULL);
if (getRightResult == errAuthorizationDenied) {
if (AuthorizationRightSet(auth, rightName, (__bridge CFTypeRef _Nonnull)(@(kAuthorizationRuleAuthenticateAsAdmin)), (__bridge CFStringRef _Nullable)(authorizationPrompt), NULL, NULL) != errAuthorizationSuccess) {
NSLog(@"Failed to set right");
return -1;
}
}
AuthorizationItem right = { .name = rightName, .valueLength = 0, .value = NULL, .flags = 0 };
AuthorizationRights rights = { .count = 1, .items = &right };
AuthorizationFlags flags = (AuthorizationFlags)(kAuthorizationFlagExtendRights | kAuthorizationFlagInteractionAllowed);
AuthorizationItem iconAuthorizationItem = {.name = kAuthorizationEnvironmentIcon, .valueLength = 0, .value = NULL, .flags = 0};
AuthorizationEnvironment authorizationEnvironment = {.count = 0, .items = NULL};
BOOL failedToUseSystemDomain = NO;
OSStatus copyStatus = AuthorizationCopyRights(auth, &rights, &authorizationEnvironment, flags, NULL);
if (copyStatus != errAuthorizationSuccess) {
failedToUseSystemDomain = YES;
if (copyStatus == errAuthorizationCanceled) {
NSLog(@"User cancelled authorization");
return -1;
} else {
NSLog(@"Failed copying system domain rights: %d", copyStatus);
return -1;
}
}
const char *toolPath = "/bin/ln";
const char *args[] = {"-s", "-F", [resPath UTF8String], "/usr/local/bin/ollama", NULL};
FILE *pipe = NULL;
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
OSStatus status = AuthorizationExecuteWithPrivileges(auth, toolPath, kAuthorizationFlagDefaults, (char *const *)args, &pipe);
if (status != errAuthorizationSuccess) {
NSLog(@"Failed to create symlink");
return -1;
}
AuthorizationFree(auth, kAuthorizationFlagDestroyRights);
return 0;
}

166
app/app_windows.go Normal file
View File

@@ -0,0 +1,166 @@
package main
import (
"context"
"errors"
"fmt"
"log"
"log/slog"
"os"
"os/exec"
"os/signal"
"path/filepath"
"strings"
"syscall"
"github.com/ollama/ollama/app/lifecycle"
"github.com/ollama/ollama/app/store"
"github.com/ollama/ollama/app/tray"
"github.com/ollama/ollama/app/updater"
)
func init() {
AppName += ".exe"
CLIName += ".exe"
// Logs, configs, downloads go to LOCALAPPDATA
localAppData := os.Getenv("LOCALAPPDATA")
AppDataDir = filepath.Join(localAppData, "Ollama")
AppLogFile = filepath.Join(AppDataDir, "app.log")
ServerLogFile = filepath.Join(AppDataDir, "server.log")
// Executables are stored in APPDATA
AppDir = filepath.Join(localAppData, "Programs", "Ollama")
// Make sure we have PATH set correctly for any spawned children
paths := strings.Split(os.Getenv("PATH"), ";")
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
found := false
for _, path := range paths {
d, err := filepath.Abs(path)
if err != nil {
continue
}
if strings.EqualFold(AppDir, d) {
found = true
}
}
if !found {
paths = append(paths, AppDir)
pathVal := strings.Join(paths, ";")
slog.Debug("setting PATH=" + pathVal)
err := os.Setenv("PATH", pathVal)
if err != nil {
slog.Error(fmt.Sprintf("failed to update PATH: %s", err))
}
}
// Make sure our logging dir exists
_, err := os.Stat(AppDataDir)
if errors.Is(err, os.ErrNotExist) {
if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
}
}
}
func ShowLogs() {
cmd_path := "c:\\Windows\\system32\\cmd.exe"
slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
err := cmd.Start()
if err != nil {
slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
}
}
func Start() {
cmd_path := "c:\\Windows\\system32\\cmd.exe"
slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
err := cmd.Start()
if err != nil {
slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
}
}
func run() {
initLogging()
slog.Info("ollama windows app started")
ctx, cancel := context.WithCancel(context.Background())
var done chan int
t, err := tray.NewTray()
if err != nil {
log.Fatalf("Failed to start: %s", err)
}
callbacks := t.GetCallbacks()
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
go func() {
slog.Debug("starting callback loop")
for {
select {
case <-callbacks.Quit:
slog.Debug("quit called")
t.Quit()
case <-signals:
slog.Debug("shutting down due to signal")
t.Quit()
case <-callbacks.Update:
err := updater.DoUpgrade(cancel, done)
if err != nil {
slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
}
case <-callbacks.ShowLogs:
ShowLogs()
case <-callbacks.DoFirstUse:
err := lifecycle.GetStarted()
if err != nil {
slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
}
}
}
}()
if !store.GetFirstTimeRun() {
slog.Debug("First time run")
err = t.DisplayFirstUseNotification()
if err != nil {
slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
}
store.SetFirstTimeRun(true)
} else {
slog.Debug("Not first time, skipping first run notification")
}
if isServerRunning(ctx) {
slog.Info("Detected another instance of ollama running, exiting")
os.Exit(1)
}
done, err = SpawnServer(ctx, CLIName)
if err != nil {
// TODO - should we retry in a backoff loop?
// TODO - should we pop up a warning and maybe add a menu item to view application logs?
slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
done = make(chan int, 1)
done <- 1
}
updater.StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
t.Run()
cancel()
slog.Info("Waiting for ollama server to shutdown...")
if done != nil {
<-done
}
slog.Info("Ollama app exiting")
}

View File

@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDisplayName</key>
<string>Ollama</string>
<key>CFBundleExecutable</key>
<string>Ollama</string>
<key>CFBundleIconFile</key>
<string>icon.icns</string>
<key>CFBundleIdentifier</key>
<string>com.ollama.ollama</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>Ollama</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>0.0.0</string>
<key>CFBundleVersion</key>
<string>0.0.0</string>
<key>DTCompiler</key>
<string>com.apple.compilers.llvm.clang.1_0</string>
<key>DTSDKBuild</key>
<string>22E245</string>
<key>DTSDKName</key>
<string>macosx13.3</string>
<key>DTXcode</key>
<string>1431</string>
<key>DTXcodeBuild</key>
<string>14E300c</string>
<key>LSApplicationCategoryType</key>
<string>public.app-category.developer-tools</string>
<key>LSMinimumSystemVersion</key>
<string>11.0</string>
<key>LSUIElement</key>
<true/>
</dict>
</plist>

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 382 B

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 691 B

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 382 B

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 721 B

View File

@@ -1,5 +1,3 @@
//go:build !windows
package lifecycle
import "fmt"

View File

@@ -1,92 +0,0 @@
package lifecycle
import (
"context"
"fmt"
"log"
"log/slog"
"os"
"os/signal"
"syscall"
"github.com/ollama/ollama/app/store"
"github.com/ollama/ollama/app/tray"
)
func Run() {
InitLogging()
ctx, cancel := context.WithCancel(context.Background())
var done chan int
t, err := tray.NewTray()
if err != nil {
log.Fatalf("Failed to start: %s", err)
}
callbacks := t.GetCallbacks()
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
go func() {
slog.Debug("starting callback loop")
for {
select {
case <-callbacks.Quit:
slog.Debug("quit called")
t.Quit()
case <-signals:
slog.Debug("shutting down due to signal")
t.Quit()
case <-callbacks.Update:
err := DoUpgrade(cancel, done)
if err != nil {
slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
}
case <-callbacks.ShowLogs:
ShowLogs()
case <-callbacks.DoFirstUse:
err := GetStarted()
if err != nil {
slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
}
}
}
}()
// Are we first use?
if !store.GetFirstTimeRun() {
slog.Debug("First time run")
err = t.DisplayFirstUseNotification()
if err != nil {
slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
}
store.SetFirstTimeRun(true)
} else {
slog.Debug("Not first time, skipping first run notification")
}
if IsServerRunning(ctx) {
slog.Info("Detected another instance of ollama running, exiting")
os.Exit(1)
} else {
done, err = SpawnServer(ctx, CLIName)
if err != nil {
// TODO - should we retry in a backoff loop?
// TODO - should we pop up a warning and maybe add a menu item to view application logs?
slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
done = make(chan int, 1)
done <- 1
}
}
StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
t.Run()
cancel()
slog.Info("Waiting for ollama server to shutdown...")
if done != nil {
<-done
}
slog.Info("Ollama app exiting")
}

View File

@@ -1,9 +0,0 @@
//go:build !windows
package lifecycle
import "log/slog"
func ShowLogs() {
slog.Warn("ShowLogs not yet implemented")
}

View File

@@ -1,19 +0,0 @@
package lifecycle
import (
"fmt"
"log/slog"
"os/exec"
"syscall"
)
func ShowLogs() {
cmd_path := "c:\\Windows\\system32\\cmd.exe"
slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
err := cmd.Start()
if err != nil {
slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
}
}

View File

@@ -70,10 +70,5 @@ func init() {
}
}
} else if runtime.GOOS == "darwin" {
// TODO
AppName += ".app"
// } else if runtime.GOOS == "linux" {
// TODO
}
}

View File

@@ -1,4 +1,4 @@
package lifecycle
package main
import (
"fmt"
@@ -7,7 +7,7 @@ import (
"path/filepath"
)
func InitLogging() {
func initLogging() {
level := slog.LevelInfo
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
@@ -41,6 +41,4 @@ func InitLogging() {
})
slog.SetDefault(slog.New(handler))
slog.Info("ollama app started")
}

View File

@@ -2,11 +2,15 @@ package main
// Compile with the following to get rid of the cmd pop up on windows
// go build -ldflags="-H windowsgui" .
import (
"github.com/ollama/ollama/app/lifecycle"
var (
AppName string
CLIName string
AppDir string
AppDataDir string
AppLogFile string
ServerLogFile string
)
func main() {
lifecycle.Run()
run()
}

View File

@@ -1,4 +1,4 @@
package lifecycle
package main
import (
"context"
@@ -14,65 +14,41 @@ import (
"github.com/ollama/ollama/api"
)
func getCLIFullPath(command string) string {
cmdPath := ""
appExe, err := os.Executable()
if err == nil {
cmdPath = filepath.Join(filepath.Dir(appExe), command)
_, err := os.Stat(cmdPath)
if err == nil {
return cmdPath
}
}
cmdPath, err = exec.LookPath(command)
if err == nil {
_, err := os.Stat(cmdPath)
if err == nil {
return cmdPath
}
}
pwd, err := os.Getwd()
if err == nil {
cmdPath = filepath.Join(pwd, command)
_, err = os.Stat(cmdPath)
if err == nil {
return cmdPath
}
}
return command
type ServerOptions struct {
Cors bool
Expose bool
ModelsPath string
}
func SpawnServer(ctx context.Context, command string) (chan int, error) {
done := make(chan int)
func start(ctx context.Context, command string, options ServerOptions) (*exec.Cmd, error) {
cmd := getCmd(ctx, command)
logDir := filepath.Dir(ServerLogFile)
_, err := os.Stat(logDir)
if errors.Is(err, os.ErrNotExist) {
if err := os.MkdirAll(logDir, 0o755); err != nil {
return done, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
}
// set environment variables
if options.ModelsPath != "" {
cmd.Env = append(cmd.Env, fmt.Sprintf("OLLAMA_MODELS=%s", options.ModelsPath))
}
if options.Cors {
cmd.Env = append(cmd.Env, "OLLAMA_ORIGINS=*")
}
if options.Expose {
cmd.Env = append(cmd.Env, "OLLAMA_HOST=0.0.0.0")
}
cmd := getCmd(ctx, getCLIFullPath(command))
// send stdout and stderr to a file
stdout, err := cmd.StdoutPipe()
if err != nil {
return done, fmt.Errorf("failed to spawn server stdout pipe %s", err)
return nil, fmt.Errorf("failed to spawn server stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return done, fmt.Errorf("failed to spawn server stderr pipe %s", err)
}
stdin, err := cmd.StdinPipe()
if err != nil {
return done, fmt.Errorf("failed to spawn server stdin pipe %s", err)
return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
}
// TODO - rotation
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
if err != nil {
return done, fmt.Errorf("failed to create server log %w", err)
return nil, fmt.Errorf("failed to create server log: %w", err)
}
go func() {
defer logFile.Close()
@@ -117,19 +93,38 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
// run the command and wait for it to finish
if err := cmd.Start(); err != nil {
return done, fmt.Errorf("failed to start server %w", err)
return nil, fmt.Errorf("failed to start server %w", err)
}
if cmd.Process != nil {
slog.Info(fmt.Sprintf("started ollama server with pid %d", cmd.Process.Pid))
}
slog.Info(fmt.Sprintf("ollama server logs %s", ServerLogFile))
return cmd, nil
}
func SpawnServer(ctx context.Context, command string, options ServerOptions) (chan int, error) {
logDir := filepath.Dir(ServerLogFile)
_, err := os.Stat(logDir)
if errors.Is(err, os.ErrNotExist) {
if err := os.MkdirAll(logDir, 0o755); err != nil {
return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
}
}
done := make(chan int)
go func() {
// Keep the server running unless we're shuttind down the app
crashCount := 0
for {
slog.Info(fmt.Sprintf("starting server..."))
cmd, err := start(ctx, command, options)
if err != nil {
slog.Error(fmt.Sprintf("failed to start server %s", err))
}
cmd.Wait() //nolint:errcheck
stdin.Close()
var code int
if cmd.ProcessState != nil {
code = cmd.ProcessState.ExitCode()
@@ -143,19 +138,16 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
default:
crashCount++
slog.Warn(fmt.Sprintf("server crash %d - exit code %d - respawning", crashCount, code))
time.Sleep(500 * time.Millisecond)
if err := cmd.Start(); err != nil {
slog.Error(fmt.Sprintf("failed to restart server %s", err))
// Keep trying, but back off if we keep failing
time.Sleep(time.Duration(crashCount) * time.Second)
}
time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
break
}
}
}()
return done, nil
}
func IsServerRunning(ctx context.Context) bool {
func isServerRunning(ctx context.Context) bool {
client, err := api.ClientFromEnvironment()
if err != nil {
slog.Info("unable to connect to server")

View File

@@ -1,6 +1,4 @@
//go:build !windows
package lifecycle
package main
import (
"context"

View File

@@ -1,4 +1,4 @@
package lifecycle
package main
import (
"context"

View File

@@ -1,5 +1,3 @@
//go:build !windows
package tray
import (

View File

@@ -1,4 +1,4 @@
package lifecycle
package updater
import (
"context"
@@ -22,6 +22,10 @@ import (
"github.com/ollama/ollama/version"
)
var (
UpdateStageDir string
)
var (
UpdateCheckURLBase = "https://ollama.com/api/update"
UpdateDownloaded = false
@@ -123,7 +127,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
slog.Debug("no etag detected, falling back to filename based dedup")
etag = "_"
}
filename := Installer
filename := "OllamaSetup.exe"
_, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition"))
if err == nil {
filename = params["filename"]

View File

@@ -1,6 +1,4 @@
//go:build !windows
package lifecycle
package updater
import (
"context"

View File

@@ -1,4 +1,4 @@
package lifecycle
package updater
import (
"context"
@@ -9,7 +9,13 @@ import (
"path/filepath"
)
func init() {
UpdateStageDir = filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "updates")
}
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
logFile := filepath.Join(os.Getenv("LOCALAPPDATA"), "Ollama", "upgrade.log")
files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform
if err != nil {
return fmt.Errorf("failed to lookup downloads: %s", err)
@@ -23,13 +29,13 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
installerExe := files[0]
slog.Info("starting upgrade with " + installerExe)
slog.Info("upgrade log file " + UpgradeLogFile)
slog.Info("upgrade log file " + logFile)
// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
installArgs := []string{
"/CLOSEAPPLICATIONS", // Quit the tray app if it's still running
"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
"/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed
"/CLOSEAPPLICATIONS", // Quit the tray app if it's still running
"/LOG=" + filepath.Base(logFile), // Only relative seems reliable, so set pwd
"/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed
}
// When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts)
// TODO - temporarily disable since we're pinning in debug mode for the preview
@@ -53,7 +59,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
}
slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs))
os.Chdir(filepath.Dir(UpgradeLogFile)) //nolint:errcheck
os.Chdir(filepath.Dir(logFile)) //nolint:errcheck
cmd := exec.Command(installerExe, installArgs...)
if err := cmd.Start(); err != nil {

View File

@@ -92,12 +92,8 @@ Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64
Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
; Assumes v5.7, may need adjustments for v6
#if GetEnv("HIP_PATH") != ""
Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
; amdhip64.dll dependency comes from the driver and must be installed already
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
#if DirExists("..\dist\windows-amd64\rocm")
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
#endif
@@ -133,7 +129,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
;FinishedHeadingLabel=Run your first model
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama2
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3
;ClickFinish=%n
[Registry]

View File

View File

@@ -17,6 +17,7 @@ import (
"os"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"strings"
"syscall"
@@ -53,8 +54,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
modelfile, err := os.ReadFile(filename)
if err != nil {
return err
@@ -95,95 +94,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
// TODO make this work w/ adapters
if fi.IsDir() {
tf, err := os.CreateTemp("", "ollama-tf")
// this is likely a safetensors or pytorch directory
// TODO make this work w/ adapters
tempfile, err := tempZipFiles(path)
if err != nil {
return err
}
defer os.RemoveAll(tf.Name())
defer os.RemoveAll(tempfile)
zf := zip.NewWriter(tf)
files := []string{}
tfiles, err := filepath.Glob(filepath.Join(path, "pytorch_model-*.bin"))
if err != nil {
return err
} else if len(tfiles) == 0 {
tfiles, err = filepath.Glob(filepath.Join(path, "model-*.safetensors"))
if err != nil {
return err
}
}
files = append(files, tfiles...)
if len(files) == 0 {
return fmt.Errorf("no models were found in '%s'", path)
}
// add the safetensor/torch config file + tokenizer
files = append(files, filepath.Join(path, "config.json"))
files = append(files, filepath.Join(path, "params.json"))
files = append(files, filepath.Join(path, "added_tokens.json"))
files = append(files, filepath.Join(path, "tokenizer.model"))
for _, fn := range files {
f, err := os.Open(fn)
// just skip whatever files aren't there
if os.IsNotExist(err) {
if strings.HasSuffix(fn, "tokenizer.model") {
// try the parent dir before giving up
parentDir := filepath.Dir(path)
newFn := filepath.Join(parentDir, "tokenizer.model")
f, err = os.Open(newFn)
if os.IsNotExist(err) {
continue
} else if err != nil {
return err
}
} else {
continue
}
} else if err != nil {
return err
}
fi, err := f.Stat()
if err != nil {
return err
}
h, err := zip.FileInfoHeader(fi)
if err != nil {
return err
}
h.Name = filepath.Base(fn)
h.Method = zip.Store
w, err := zf.CreateHeader(h)
if err != nil {
return err
}
_, err = io.Copy(w, f)
if err != nil {
return err
}
}
if err := zf.Close(); err != nil {
return err
}
if err := tf.Close(); err != nil {
return err
}
path = tf.Name()
path = tempfile
}
digest, err := createBlob(cmd, client, path)
@@ -191,10 +111,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
name := c.Name
if c.Name == "model" {
name = "from"
}
re := regexp.MustCompile(fmt.Sprintf(`(?im)^(%s)\s+%s\s*$`, name, c.Args))
modelfile = re.ReplaceAll(modelfile, []byte("$1 @"+digest))
}
}
bars := make(map[string]*progress.Bar)
fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" {
spinner.Stop()
@@ -228,6 +155,114 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return nil
}
func tempZipFiles(path string) (string, error) {
tempfile, err := os.CreateTemp("", "ollama-tf")
if err != nil {
return "", err
}
defer tempfile.Close()
zipfile := zip.NewWriter(tempfile)
defer zipfile.Close()
detectContentType := func(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
var b bytes.Buffer
b.Grow(512)
if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
return "", err
}
contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
return contentType, nil
}
glob := func(pattern, contentType string) ([]string, error) {
matches, err := filepath.Glob(pattern)
if err != nil {
return nil, err
}
for _, safetensor := range matches {
if ct, err := detectContentType(safetensor); err != nil {
return nil, err
} else if ct != contentType {
return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
}
}
return matches, nil
}
var files []string
if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
// safetensors files might be unresolved git lfs references; skip if they are
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
files = append(files, st...)
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
files = append(files, pt...)
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers consolidated.x.pth, consolidated.pth
files = append(files, pt...)
} else {
return "", errors.New("no safetensors or torch files found")
}
// add configuration files, json files are detected as text/plain
js, err := glob(filepath.Join(path, "*.json"), "text/plain")
if err != nil {
return "", err
}
files = append(files, js...)
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
// tokenizer.model might be a unresolved git lfs reference; error if it is
files = append(files, tks...)
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
files = append(files, tks...)
}
for _, file := range files {
f, err := os.Open(file)
if err != nil {
return "", err
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return "", err
}
zfi, err := zip.FileInfoHeader(fi)
if err != nil {
return "", err
}
zf, err := zipfile.CreateHeader(zfi)
if err != nil {
return "", err
}
if _, err := io.Copy(zf, f); err != nil {
return "", err
}
}
return tempfile.Name(), nil
}
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
bin, err := os.Open(path)
if err != nil {

View File

@@ -14,7 +14,7 @@ As this is a preview release, you should expect a few bugs here and there. If
you run into a problem you can reach out on
[Discord](https://discord.gg/ollama), or file an
[issue](https://github.com/ollama/ollama/issues).
Logs will often be helpful in dianosing the problem (see
Logs will often be helpful in diagnosing the problem (see
[Troubleshooting](#troubleshooting) below)
## System Requirements

View File

@@ -32,9 +32,25 @@ func PayloadsDir() (string, error) {
slog.Error("failed to lookup executable path", "error", err)
return "", err
}
cwd, err := os.Getwd()
if err != nil {
slog.Error("failed to lookup working directory", "error", err)
return "", err
}
var paths []string
for _, root := range []string{appExe, cwd} {
paths = append(paths,
filepath.Join(root),
filepath.Join(root, "windows-"+runtime.GOARCH),
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} {
candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners")
for _, p := range paths {
candidate := filepath.Join(p, "ollama_runners")
_, err := os.Stat(candidate)
if err == nil {
runnersDir = candidate

View File

@@ -1,5 +1,3 @@
//go:build darwin
package gpu
/*

View File

@@ -21,7 +21,7 @@ init_vars() {
# TODO - add additional optimization flags...
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
fi
case $(uname -s) in
case $(uname -s) in
"Darwin")
LIB_EXT="dylib"
WHOLE_ARCHIVE="-Wl,-force_load"

View File

@@ -165,11 +165,11 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
fi
if [ "${ARCH}" == "arm64" ]; then
echo "ARM CPU detected - disabling unsupported AVX instructions"
# ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
#
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
# Disabling has minimal performance effect while maintaining compatibility.
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
# Disabling has minimal performance effect while maintaining compatibility.
ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
fi
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp

View File

@@ -26,16 +26,25 @@ function amdGPUs {
$GPU_LIST -join ';'
}
function init_vars {
$script:SRC_DIR = $(resolve-path "..\..\")
$script:llamacppDir = "../llama.cpp"
if (!$script:SRC_DIR) {
$script:SRC_DIR = $(resolve-path "..\..\")
}
if (!$script:llamacppDir) {
$script:llamacppDir = "../llama.cpp"
}
if (!$script:cmakeTargets) {
$script:cmakeTargets = @("ollama_llama_server")
}
$script:cmakeDefs = @(
"-DBUILD_SHARED_LIBS=on",
"-DLLAMA_NATIVE=off"
)
$script:cmakeTargets = @("ollama_llama_server")
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
$script:ARCH = "amd64" # arm not yet supported.
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
md "$script:DIST_BASE" -ea 0 > $null
if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
$script:config = "RelWithDebInfo"
@@ -166,137 +175,191 @@ function cleanup {
}
}
init_vars
git_module_setup
apply_patches
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
function build_static() {
if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
# GCC build for direct linking into the Go binary
init_vars
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
# as we need this to be compiled by gcc for golang to be able to link with itx
write-host "Checking for MinGW..."
# error action ensures we exit on failure
get-command gcc
get-command mingw32-make
$oldTargets = $script:cmakeTargets
$script:cmakeTargets = @("llama", "ggml")
$script:cmakeDefs = @(
"-G", "MinGW Makefiles"
"-DCMAKE_C_COMPILER=gcc.exe",
"-DCMAKE_CXX_COMPILER=g++.exe",
"-DBUILD_SHARED_LIBS=off",
"-DLLAMA_NATIVE=off",
"-DLLAMA_AVX=off",
"-DLLAMA_AVX2=off",
"-DLLAMA_AVX512=off",
"-DLLAMA_F16C=off",
"-DLLAMA_FMA=off")
$script:buildDir="../build/windows/${script:ARCH}_static"
write-host "Building static library"
build
$script:cmakeTargets = $oldTargets
} else {
write-host "Skipping CPU generation step as requested"
}
}
function build_cpu() {
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
# remaining llama.cpp builds use MSVC
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu"
$script:distDir="$script:DIST_BASE\cpu"
write-host "Building LCD CPU"
build
sign
install
} else {
write-host "Skipping CPU generation step as requested"
}
}
function build_cpu_avx() {
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
$script:distDir="$script:DIST_BASE\cpu_avx"
write-host "Building AVX CPU"
build
sign
install
} else {
write-host "Skipping CPU AVX generation step as requested"
}
}
function build_cpu_avx2() {
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
$script:distDir="$script:DIST_BASE\cpu_avx2"
write-host "Building AVX2 CPU"
build
sign
install
} else {
write-host "Skipping CPU AVX2 generation step as requested"
}
}
function build_cuda() {
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
# Then build cuda as a dynamically loaded library
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
if ($null -ne $script:CUDA_VERSION) {
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
}
init_vars
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
write-host "building custom CUDA GPU"
}
build
sign
install
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\"
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\"
} else {
write-host "Skipping CUDA generation step"
}
}
function build_rocm() {
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
if ($null -ne $script:ROCM_VERSION) {
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
}
init_vars
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
$script:cmakeDefs += @(
"-G", "Ninja",
"-DCMAKE_C_COMPILER=clang.exe",
"-DCMAKE_CXX_COMPILER=clang++.exe",
"-DLLAMA_HIPBLAS=on",
"-DHIP_PLATFORM=amd",
"-DLLAMA_AVX=on",
"-DLLAMA_AVX2=off",
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
"-DAMDGPU_TARGETS=$(amdGPUs)",
"-DGPU_TARGETS=$(amdGPUs)"
)
# Make sure the ROCm binary dir is first in the path
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
# We have to clobber the LIB var from the developer shell for clang to work properly
$env:LIB=""
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
write-host "building custom ROCM GPU"
}
write-host "Building ROCm"
build
# Ninja doesn't prefix with config name
${script:config}=""
if ($null -ne $script:DUMPBIN) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
}
sign
install
# Assumes v5.7, may need adjustments for v6
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
} else {
write-host "Skipping ROCm generation step"
}
}
# GCC build for direct linking into the Go binary
init_vars
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
# as we need this to be compiled by gcc for golang to be able to link with itx
write-host "Checking for MinGW..."
# error action ensures we exit on failure
get-command gcc
get-command mingw32-make
$script:cmakeTargets = @("llama", "ggml")
$script:cmakeDefs = @(
"-G", "MinGW Makefiles"
"-DCMAKE_C_COMPILER=gcc.exe",
"-DCMAKE_CXX_COMPILER=g++.exe",
"-DBUILD_SHARED_LIBS=off",
"-DLLAMA_NATIVE=off",
"-DLLAMA_AVX=off",
"-DLLAMA_AVX2=off",
"-DLLAMA_AVX512=off",
"-DLLAMA_F16C=off",
"-DLLAMA_FMA=off")
$script:buildDir="../build/windows/${script:ARCH}_static"
write-host "Building static library"
build
if ($($args.count) -eq 0) {
git_module_setup
apply_patches
build_static
build_cpu
build_cpu_avx
build_cpu_avx2
build_cuda
build_rocm
# remaining llama.cpp builds use MSVC
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu"
$script:distDir="$script:DIST_BASE\cpu"
write-host "Building LCD CPU"
build
sign
install
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
$script:distDir="$script:DIST_BASE\cpu_avx"
write-host "Building AVX CPU"
build
sign
install
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
$script:distDir="$script:DIST_BASE\cpu_avx2"
write-host "Building AVX2 CPU"
build
sign
install
cleanup
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
} else {
write-host "Skipping CPU generation step as requested"
}
if ($null -ne $script:CUDA_LIB_DIR) {
# Then build cuda as a dynamically loaded library
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
if ($null -ne $script:CUDA_VERSION) {
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
}
init_vars
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
write-host "building custom CUDA GPU"
}
build
sign
install
}
if ($null -ne $env:HIP_PATH) {
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
if ($null -ne $script:ROCM_VERSION) {
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
}
init_vars
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
$script:cmakeDefs += @(
"-G", "Ninja",
"-DCMAKE_C_COMPILER=clang.exe",
"-DCMAKE_CXX_COMPILER=clang++.exe",
"-DLLAMA_HIPBLAS=on",
"-DHIP_PLATFORM=amd",
"-DLLAMA_AVX=on",
"-DLLAMA_AVX2=off",
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
"-DAMDGPU_TARGETS=$(amdGPUs)",
"-DGPU_TARGETS=$(amdGPUs)"
)
# Make sure the ROCm binary dir is first in the path
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
# We have to clobber the LIB var from the developer shell for clang to work properly
$env:LIB=""
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
write-host "building custom ROCM GPU"
}
write-host "Building ROCm"
build
# Ninja doesn't prefix with config name
${script:config}=""
if ($null -ne $script:DUMPBIN) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
}
sign
install
}
cleanup
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
for ( $i = 0; $i -lt $args.count; $i++ ) {
write-host "performing $($args[$i])"
& $($args[$i])
}
}

View File

@@ -5,7 +5,6 @@ import (
"log/slog"
"os"
"strconv"
"strings"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
@@ -100,8 +99,26 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
return 0, 0
}
var layerCount int
layers := ggml.Tensors().Layers()
var memoryLayerOutput uint64
if layer, ok := layers["output_norm"]; ok {
memoryLayerOutput += layer.size()
}
if layer, ok := layers["output"]; ok {
memoryLayerOutput += layer.size()
} else if layer, ok := layers["token_embd"]; ok {
memoryLayerOutput += layer.size()
}
if gpus[0].Library == "metal" && opts.UseMMap {
// memory is preallocated for output tensors
memoryRequiredTotal += memoryLayerOutput
memoryRequiredPartial += memoryLayerOutput
}
var layerCount int
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
@@ -115,15 +132,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
}
}
var memoryLayerOutput uint64
for k, v := range layers {
if !strings.HasPrefix(k, "blk.") {
memoryLayerOutput += v.size()
}
if gpus[0].Library != "metal" || !opts.UseMMap {
// memory was not preallocated for output tensors
memoryRequiredTotal += memoryLayerOutput
}
memoryRequiredTotal += memoryLayerOutput
if memoryAvailable > memoryRequiredTotal {
layerCount = int(ggml.KV().BlockCount()) + 1
memoryRequiredPartial = memoryRequiredTotal

View File

@@ -0,0 +1,12 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index e431c7f7..f077e688 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -3,6 +3,7 @@
// I'll gradually clean and extend it
// Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
#include "clip.h"
+#include "common.h"
#include "log.h"
#include "ggml.h"
#include "ggml-alloc.h"

45
llm/patches/04-metal.diff Normal file
View File

@@ -0,0 +1,45 @@
diff --git a/ggml-metal.m b/ggml-metal.m
index 0207b787..b5e9884b 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
// to the matrix-vector kernel
int ne11_mm_min = 1;
-#if 0
// the numbers below are measured on M2 Ultra for 7B and 13B models
// these numbers do not translate to other devices or model sizes
// TODO: need to find a better approach
- if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
- switch (src0t) {
- case GGML_TYPE_F16: ne11_mm_min = 2; break;
- case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
- case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
- case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
- case GGML_TYPE_Q4_0:
- case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
- case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
- case GGML_TYPE_Q5_0: // not tested yet
- case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
- case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
- case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
- default: ne11_mm_min = 1; break;
- }
+ switch (src0t) {
+ case GGML_TYPE_F16: ne11_mm_min = 2; break;
+ case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
+ case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
+ case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
+ case GGML_TYPE_Q4_0:
+ case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
+ case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
+ case GGML_TYPE_Q5_0: // not tested yet
+ case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
+ case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
+ case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
+ default: ne11_mm_min = 1; break;
}
-#endif
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel

View File

@@ -442,7 +442,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
select {
case <-ctx.Done():
slog.Info("context expired before server started")
return fmt.Errorf("timed out waiting for llama runner to start")
return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
case err := <-s.done:
msg := ""
if s.status != nil && s.status.LastErrMsg != "" {
@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
return err
}
defer s.sem.Release(1)
// only allow maximum 10 "context shifts" to avoid infinite generation
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
req.Options.NumPredict = 10 * s.options.NumCtx
slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
}
request := map[string]any{
"prompt": req.Prompt,
"stream": true,

View File

@@ -1,16 +0,0 @@
{
"env": {
"browser": true,
"es6": true,
"node": true
},
"extends": [
"eslint:recommended",
"plugin:@typescript-eslint/eslint-recommended",
"plugin:@typescript-eslint/recommended",
"plugin:import/recommended",
"plugin:import/electron",
"plugin:import/typescript"
],
"parser": "@typescript-eslint/parser"
}

92
macapp/.gitignore vendored
View File

@@ -1,92 +0,0 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
.DS_Store
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
.env.test
# parcel-bundler cache (https://parceljs.org/)
.cache
# next.js build output
.next
# nuxt.js build output
.nuxt
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# Webpack
.webpack/
# Vite
.vite/
# Electron-Forge
out/

View File

@@ -1,21 +0,0 @@
# Desktop
This app builds upon Ollama to provide a desktop experience for running models.
## Developing
First, build the `ollama` binary:
```
cd ..
go build .
```
Then run the desktop app with `npm start`:
```
cd macapp
npm install
npm start
```

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 402 B

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 741 B

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 440 B

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 763 B

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 447 B

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 891 B

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 443 B

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 844 B

View File

@@ -1,78 +0,0 @@
import type { ForgeConfig } from '@electron-forge/shared-types'
import { MakerSquirrel } from '@electron-forge/maker-squirrel'
import { MakerZIP } from '@electron-forge/maker-zip'
import { PublisherGithub } from '@electron-forge/publisher-github'
import { AutoUnpackNativesPlugin } from '@electron-forge/plugin-auto-unpack-natives'
import { WebpackPlugin } from '@electron-forge/plugin-webpack'
import * as path from 'path'
import * as fs from 'fs'
import { mainConfig } from './webpack.main.config'
import { rendererConfig } from './webpack.renderer.config'
const packageJson = JSON.parse(fs.readFileSync(path.resolve(__dirname, './package.json'), 'utf8'))
const config: ForgeConfig = {
packagerConfig: {
appVersion: process.env.VERSION || packageJson.version,
asar: true,
icon: './assets/icon.icns',
extraResource: [
'../dist/ollama',
path.join(__dirname, './assets/iconTemplate.png'),
path.join(__dirname, './assets/iconTemplate@2x.png'),
path.join(__dirname, './assets/iconUpdateTemplate.png'),
path.join(__dirname, './assets/iconUpdateTemplate@2x.png'),
path.join(__dirname, './assets/iconDarkTemplate.png'),
path.join(__dirname, './assets/iconDarkTemplate@2x.png'),
path.join(__dirname, './assets/iconDarkUpdateTemplate.png'),
path.join(__dirname, './assets/iconDarkUpdateTemplate@2x.png'),
],
...(process.env.SIGN
? {
osxSign: {
identity: process.env.APPLE_IDENTITY,
},
osxNotarize: {
tool: 'notarytool',
appleId: process.env.APPLE_ID || '',
appleIdPassword: process.env.APPLE_PASSWORD || '',
teamId: process.env.APPLE_TEAM_ID || '',
},
}
: {}),
osxUniversal: {
x64ArchFiles: '**/ollama',
},
},
rebuildConfig: {},
makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
hooks: {
readPackageJson: async (_, packageJson) => {
return { ...packageJson, version: process.env.VERSION || packageJson.version }
},
},
plugins: [
new AutoUnpackNativesPlugin({}),
new WebpackPlugin({
mainConfig,
devContentSecurityPolicy: `default-src * 'unsafe-eval' 'unsafe-inline'; img-src data: 'self'`,
renderer: {
config: rendererConfig,
nodeIntegration: true,
entryPoints: [
{
html: './src/index.html',
js: './src/renderer.tsx',
name: 'main_window',
preload: {
js: './src/preload.ts',
},
},
],
},
}),
],
}
export default config

16695
macapp/package-lock.json generated
View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,84 +0,0 @@
{
"name": "ollama",
"productName": "Ollama",
"version": "0.0.0",
"description": "ollama",
"main": ".webpack/main",
"scripts": {
"start": "electron-forge start",
"package": "electron-forge package --arch universal",
"package:sign": "SIGN=1 electron-forge package --arch universal",
"make": "electron-forge make --arch universal",
"make:sign": "SIGN=1 electron-forge make --arch universal",
"publish": "SIGN=1 electron-forge publish",
"lint": "eslint --ext .ts,.tsx .",
"format": "prettier --check . --ignore-path .gitignore",
"format:fix": "prettier --write . --ignore-path .gitignore"
},
"keywords": [],
"author": {
"name": "Jeffrey Morgan",
"email": "jmorganca@gmail.com"
},
"license": "MIT",
"devDependencies": {
"@babel/core": "^7.22.5",
"@babel/preset-react": "^7.22.5",
"@electron-forge/cli": "^6.2.1",
"@electron-forge/maker-deb": "^6.2.1",
"@electron-forge/maker-rpm": "^6.2.1",
"@electron-forge/maker-squirrel": "^6.2.1",
"@electron-forge/maker-zip": "^6.2.1",
"@electron-forge/plugin-auto-unpack-natives": "^6.2.1",
"@electron-forge/plugin-webpack": "^6.2.1",
"@electron-forge/publisher-github": "^6.2.1",
"@electron/universal": "^1.4.1",
"@svgr/webpack": "^8.0.1",
"@types/chmodr": "^1.0.0",
"@types/node": "^20.4.0",
"@types/react": "^18.2.14",
"@types/react-dom": "^18.2.6",
"@types/uuid": "^9.0.2",
"@typescript-eslint/eslint-plugin": "^5.60.0",
"@typescript-eslint/parser": "^5.60.0",
"@vercel/webpack-asset-relocator-loader": "^1.7.3",
"babel-loader": "^9.1.2",
"chmodr": "^1.2.0",
"copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.8.1",
"electron": "25.9.2",
"eslint": "^8.43.0",
"eslint-plugin-import": "^2.27.5",
"fork-ts-checker-webpack-plugin": "^7.3.0",
"node-loader": "^2.0.0",
"postcss": "^8.4.24",
"postcss-import": "^15.1.0",
"postcss-loader": "^7.3.3",
"postcss-preset-env": "^8.5.1",
"prettier": "^2.8.8",
"prettier-plugin-tailwindcss": "^0.3.0",
"style-loader": "^3.3.3",
"svg-inline-loader": "^0.8.2",
"tailwindcss": "^3.3.2",
"ts-loader": "^9.4.3",
"ts-node": "^10.9.1",
"typescript": "~4.5.4",
"url-loader": "^4.1.1",
"webpack": "^5.88.0",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
},
"dependencies": {
"@electron/remote": "^2.0.10",
"@heroicons/react": "^2.0.18",
"@segment/analytics-node": "^1.0.0",
"copy-to-clipboard": "^3.3.3",
"electron-squirrel-startup": "^1.0.0",
"electron-store": "^8.1.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"uuid": "^9.0.0",
"winston": "^3.10.0",
"winston-daily-rotate-file": "^4.7.1"
}
}

View File

@@ -1,7 +0,0 @@
module.exports = {
plugins: {
'postcss-import': {},
tailwindcss: {},
autoprefixer: {},
},
}

View File

@@ -1,34 +0,0 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
html,
body {
background: transparent;
}
.drag {
-webkit-app-region: drag;
}
.no-drag {
-webkit-app-region: no-drag;
}
.blink {
-webkit-animation: 1s blink step-end infinite;
-moz-animation: 1s blink step-end infinite;
-ms-animation: 1s blink step-end infinite;
-o-animation: 1s blink step-end infinite;
animation: 1s blink step-end infinite;
}
@keyframes blink {
from,
to {
color: transparent;
}
50% {
color: black;
}
}

View File

@@ -1,122 +0,0 @@
import { useState } from 'react'
import copy from 'copy-to-clipboard'
import { CheckIcon, DocumentDuplicateIcon } from '@heroicons/react/24/outline'
import Store from 'electron-store'
import { getCurrentWindow, app } from '@electron/remote'
import { install } from './install'
import OllamaIcon from './ollama.svg'
const store = new Store()
enum Step {
WELCOME = 0,
CLI,
FINISH,
}
export default function () {
const [step, setStep] = useState<Step>(Step.WELCOME)
const [commandCopied, setCommandCopied] = useState<boolean>(false)
const command = 'ollama run llama2'
return (
<div className='drag'>
<div className='mx-auto flex min-h-screen w-full flex-col justify-between bg-white px-4 pt-16'>
{step === Step.WELCOME && (
<>
<div className='mx-auto text-center'>
<h1 className='mb-6 mt-4 text-2xl tracking-tight text-gray-900'>Welcome to Ollama</h1>
<p className='mx-auto w-[65%] text-sm text-gray-400'>
Let's get you up and running with your own large language models.
</p>
<button
onClick={() => setStep(Step.CLI)}
className='no-drag rounded-dm mx-auto my-8 w-[40%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
>
Next
</button>
</div>
<div className='mx-auto'>
<OllamaIcon />
</div>
</>
)}
{step === Step.CLI && (
<>
<div className='mx-auto flex flex-col space-y-28 text-center'>
<h1 className='mt-4 text-2xl tracking-tight text-gray-900'>Install the command line</h1>
<pre className='mx-auto text-4xl text-gray-400'>&gt; ollama</pre>
<div className='mx-auto'>
<button
onClick={async () => {
try {
await install()
setStep(Step.FINISH)
} catch (e) {
console.error('could not install: ', e)
} finally {
getCurrentWindow().show()
getCurrentWindow().focus()
}
}}
className='no-drag rounded-dm mx-auto w-[60%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
>
Install
</button>
<p className='mx-auto my-4 w-[70%] text-xs text-gray-400'>
You will be prompted for administrator access
</p>
</div>
</div>
</>
)}
{step === Step.FINISH && (
<>
<div className='mx-auto flex flex-col space-y-20 text-center'>
<h1 className='mt-4 text-2xl tracking-tight text-gray-900'>Run your first model</h1>
<div className='flex flex-col'>
<div className='group relative flex items-center'>
<pre className='language-none text-2xs w-full rounded-md bg-gray-100 px-4 py-3 text-start leading-normal'>
{command}
</pre>
<button
className={`no-drag absolute right-[5px] px-2 py-2 ${
commandCopied
? 'text-gray-900 opacity-100 hover:cursor-auto'
: 'text-gray-200 opacity-50 hover:cursor-pointer'
} hover:font-bold hover:text-gray-900 group-hover:opacity-100`}
onClick={() => {
copy(command)
setCommandCopied(true)
setTimeout(() => setCommandCopied(false), 3000)
}}
>
{commandCopied ? (
<CheckIcon className='h-4 w-4 font-bold text-gray-500' />
) : (
<DocumentDuplicateIcon className='h-4 w-4 text-gray-500' />
)}
</button>
</div>
<p className='mx-auto my-4 w-[70%] text-xs text-gray-400'>
Run this command in your favorite terminal.
</p>
</div>
<button
onClick={() => {
store.set('first-time-run', true)
window.close()
}}
className='no-drag rounded-dm mx-auto w-[60%] rounded-md bg-black px-4 py-2 text-sm text-white hover:brightness-110'
>
Finish
</button>
</div>
</>
)}
</div>
</div>
)
}

View File

@@ -1,4 +0,0 @@
declare module '*.svg' {
const content: string
export default content
}

View File

@@ -1,9 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
</head>
<body>
<div id="app"></div>
</body>
</html>

View File

@@ -1,302 +0,0 @@
import { spawn, ChildProcess } from 'child_process'
import { app, autoUpdater, dialog, Tray, Menu, BrowserWindow, MenuItemConstructorOptions, nativeTheme } from 'electron'
import Store from 'electron-store'
import winston from 'winston'
import 'winston-daily-rotate-file'
import * as path from 'path'
import { v4 as uuidv4 } from 'uuid'
import { installed } from './install'
require('@electron/remote/main').initialize()
if (require('electron-squirrel-startup')) {
app.quit()
}
const store = new Store()
let welcomeWindow: BrowserWindow | null = null
declare const MAIN_WINDOW_WEBPACK_ENTRY: string
const logger = winston.createLogger({
transports: [
new winston.transports.Console(),
new winston.transports.File({
filename: path.join(app.getPath('home'), '.ollama', 'logs', 'server.log'),
maxsize: 1024 * 1024 * 20,
maxFiles: 5,
}),
],
format: winston.format.printf(info => info.message),
})
app.on('ready', () => {
const gotTheLock = app.requestSingleInstanceLock()
if (!gotTheLock) {
app.exit(0)
return
}
app.on('second-instance', () => {
if (app.hasSingleInstanceLock()) {
app.releaseSingleInstanceLock()
}
if (proc) {
proc.off('exit', restart)
proc.kill()
}
app.exit(0)
})
app.focus({ steal: true })
init()
})
function firstRunWindow() {
// Create the browser window.
welcomeWindow = new BrowserWindow({
width: 400,
height: 500,
frame: false,
fullscreenable: false,
resizable: false,
movable: true,
show: false,
webPreferences: {
nodeIntegration: true,
contextIsolation: false,
},
})
require('@electron/remote/main').enable(welcomeWindow.webContents)
welcomeWindow.loadURL(MAIN_WINDOW_WEBPACK_ENTRY)
welcomeWindow.on('ready-to-show', () => welcomeWindow.show())
welcomeWindow.on('closed', () => {
if (process.platform === 'darwin') {
app.dock.hide()
}
})
}
let tray: Tray | null = null
let updateAvailable = false
const assetPath = app.isPackaged ? process.resourcesPath : path.join(__dirname, '..', '..', 'assets')
function trayIconPath() {
return nativeTheme.shouldUseDarkColors
? updateAvailable
? path.join(assetPath, 'iconDarkUpdateTemplate.png')
: path.join(assetPath, 'iconDarkTemplate.png')
: updateAvailable
? path.join(assetPath, 'iconUpdateTemplate.png')
: path.join(assetPath, 'iconTemplate.png')
}
function updateTrayIcon() {
if (tray) {
tray.setImage(trayIconPath())
}
}
function updateTray() {
const updateItems: MenuItemConstructorOptions[] = [
{ label: 'An update is available', enabled: false },
{
label: 'Restart to update',
click: () => autoUpdater.quitAndInstall(),
},
{ type: 'separator' },
]
const menu = Menu.buildFromTemplate([
...(updateAvailable ? updateItems : []),
{ role: 'quit', label: 'Quit Ollama', accelerator: 'Command+Q' },
])
if (!tray) {
tray = new Tray(trayIconPath())
}
tray.setToolTip(updateAvailable ? 'An update is available' : 'Ollama')
tray.setContextMenu(menu)
tray.setImage(trayIconPath())
nativeTheme.off('updated', updateTrayIcon)
nativeTheme.on('updated', updateTrayIcon)
}
let proc: ChildProcess = null
function server() {
const binary = app.isPackaged
? path.join(process.resourcesPath, 'ollama')
: path.resolve(process.cwd(), '..', 'ollama')
proc = spawn(binary, ['serve'])
proc.stdout.on('data', data => {
logger.info(data.toString().trim())
})
proc.stderr.on('data', data => {
logger.error(data.toString().trim())
})
proc.on('exit', restart)
}
function restart() {
setTimeout(server, 1000)
}
app.on('before-quit', () => {
if (proc) {
proc.off('exit', restart)
proc.kill('SIGINT') // send SIGINT signal to the server, which also stops any loaded llms
}
})
const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
process.arch
}&version=${app.getVersion()}&id=${id()}`
let latest = ''
async function isNewReleaseAvailable() {
try {
const response = await fetch(updateURL)
if (!response.ok) {
return false
}
if (response.status === 204) {
return false
}
const data = await response.json()
const url = data?.url
if (!url) {
return false
}
if (latest === url) {
return false
}
latest = url
return true
} catch (error) {
logger.error(`update check failed - ${error}`)
return false
}
}
async function checkUpdate() {
const available = await isNewReleaseAvailable()
if (available) {
logger.info('checking for update')
autoUpdater.checkForUpdates()
}
}
function init() {
if (app.isPackaged) {
checkUpdate()
setInterval(() => {
checkUpdate()
}, 60 * 60 * 1000)
}
updateTray()
if (process.platform === 'darwin') {
if (app.isPackaged) {
if (!app.isInApplicationsFolder()) {
const chosen = dialog.showMessageBoxSync({
type: 'question',
buttons: ['Move to Applications', 'Do Not Move'],
message: 'Ollama works best when run from the Applications directory.',
defaultId: 0,
cancelId: 1,
})
if (chosen === 0) {
try {
app.moveToApplicationsFolder({
conflictHandler: conflictType => {
if (conflictType === 'existsAndRunning') {
dialog.showMessageBoxSync({
type: 'info',
message: 'Cannot move to Applications directory',
detail:
'Another version of Ollama is currently running from your Applications directory. Close it first and try again.',
})
}
return true
},
})
return
} catch (e) {
logger.error(`[Move to Applications] Failed to move to applications folder - ${e.message}}`)
}
}
}
}
}
server()
if (store.get('first-time-run') && installed()) {
if (process.platform === 'darwin') {
app.dock.hide()
}
app.setLoginItemSettings({ openAtLogin: app.getLoginItemSettings().openAtLogin })
return
}
// This is the first run or the CLI is no longer installed
app.setLoginItemSettings({ openAtLogin: true })
firstRunWindow()
}
// Quit when all windows are closed, except on macOS. There, it's common
// for applications and their menu bar to stay active until the user quits
// explicitly with Cmd + Q.
app.on('window-all-closed', () => {
if (process.platform !== 'darwin') {
app.quit()
}
})
function id(): string {
const id = store.get('id') as string
if (id) {
return id
}
const uuid = uuidv4()
store.set('id', uuid)
return uuid
}
autoUpdater.setFeedURL({ url: updateURL })
autoUpdater.on('error', e => {
logger.error(`update check failed - ${e.message}`)
console.error(`update check failed - ${e.message}`)
})
autoUpdater.on('update-downloaded', () => {
updateAvailable = true
updateTray()
})

View File

@@ -1,21 +0,0 @@
import * as fs from 'fs'
import { exec as cbExec } from 'child_process'
import * as path from 'path'
import { promisify } from 'util'
const app = process && process.type === 'renderer' ? require('@electron/remote').app : require('electron').app
const ollama = app.isPackaged ? path.join(process.resourcesPath, 'ollama') : path.resolve(process.cwd(), '..', 'ollama')
const exec = promisify(cbExec)
const symlinkPath = '/usr/local/bin/ollama'
export function installed() {
return fs.existsSync(symlinkPath) && fs.readlinkSync(symlinkPath) === ollama
}
export async function install() {
const command = `do shell script "mkdir -p ${path.dirname(
symlinkPath
)} && ln -F -s \\"${ollama}\\" \\"${symlinkPath}\\"" with administrator privileges`
await exec(`osascript -e '${command}'`)
}

View File

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 17 KiB

View File

View File

@@ -1,7 +0,0 @@
import App from './app'
import './app.css'
import { createRoot } from 'react-dom/client'
const container = document.getElementById('app')
const root = createRoot(container)
root.render(<App />)

View File

@@ -1,6 +0,0 @@
/** @type {import('tailwindcss').Config} */
module.exports = {
content: ['./src/**/*.{js,ts,jsx,tsx,mdx}'],
theme: {},
plugins: [],
}

View File

@@ -1,20 +0,0 @@
{
"compilerOptions": {
"target": "ES6",
"allowJs": true,
"module": "commonjs",
"skipLibCheck": true,
"esModuleInterop": true,
"noImplicitAny": true,
"sourceMap": true,
"baseUrl": ".",
"outDir": "dist",
"moduleResolution": "node",
"resolveJsonModule": true,
"paths": {
"*": ["node_modules/*"]
},
"jsx": "react-jsx"
},
"include": ["src/**/*"]
}

View File

@@ -1,20 +0,0 @@
import type { Configuration } from 'webpack'
import { rules } from './webpack.rules'
import { plugins } from './webpack.plugins'
export const mainConfig: Configuration = {
/**
* This is the main entry point for your application, it's the first file
* that runs in the main process.
*/
entry: './src/index.ts',
// Put your normal webpack config below here
module: {
rules,
},
plugins,
resolve: {
extensions: ['.js', '.ts', '.jsx', '.tsx', '.css', '.json'],
},
}

View File

@@ -1,14 +0,0 @@
import type IForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin'
import { DefinePlugin } from 'webpack'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const ForkTsCheckerWebpackPlugin: typeof IForkTsCheckerWebpackPlugin = require('fork-ts-checker-webpack-plugin')
export const plugins = [
new ForkTsCheckerWebpackPlugin({
logger: 'webpack-infrastructure',
}),
new DefinePlugin({
'process.env.TELEMETRY_WRITE_KEY': JSON.stringify(process.env.TELEMETRY_WRITE_KEY),
}),
]

View File

@@ -1,19 +0,0 @@
import type { Configuration } from 'webpack'
import { rules } from './webpack.rules'
import { plugins } from './webpack.plugins'
rules.push({
test: /\.css$/,
use: [{ loader: 'style-loader' }, { loader: 'css-loader' }, { loader: 'postcss-loader' }],
})
export const rendererConfig: Configuration = {
module: {
rules,
},
plugins,
resolve: {
extensions: ['.js', '.ts', '.jsx', '.tsx', '.css'],
},
}

View File

@@ -1,35 +0,0 @@
import type { ModuleOptions } from 'webpack'
export const rules: Required<ModuleOptions>['rules'] = [
// Add support for native node modules
{
// We're specifying native_modules in the test because the asset relocator loader generates a
// "fake" .node file which is really a cjs file.
test: /native_modules[/\\].+\.node$/,
use: 'node-loader',
},
{
test: /[/\\]node_modules[/\\].+\.(m?js|node)$/,
parser: { amd: false },
use: {
loader: '@vercel/webpack-asset-relocator-loader',
options: {
outputAssetBase: 'native_modules',
},
},
},
{
test: /\.tsx?$/,
exclude: /(node_modules|\.webpack)/,
use: {
loader: 'ts-loader',
options: {
transpileOnly: true,
},
},
},
{
test: /\.svg$/,
use: ['@svgr/webpack'],
},
]

View File

@@ -11,26 +11,37 @@ for TARGETARCH in arm64 amd64; do
rm -rf llm/llama.cpp/build
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -C app -trimpath -o ../dist/ollama-app-darwin-$TARGETARCH
done
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
lipo -create -output dist/ollama-app dist/ollama-app-darwin-arm64 dist/ollama-app-darwin-amd64
rm -f dist/ollama-darwin-* dist/ollama-app-darwin-*
# create the mac app
rm -rf dist/Ollama.app
cp -R app/darwin/Ollama.app dist/
/usr/libexec/PlistBuddy -c "Set :CFBundleShortVersionString $VERSION" dist/Ollama.app/Contents/Info.plist
mkdir -p dist/Ollama.app/Contents/MacOS
mv dist/ollama-app dist/Ollama.app/Contents/MacOS/Ollama
cp dist/ollama dist/Ollama.app/Contents/Resources/ollama
# sign and notarize the app
if [ -n "$APPLE_IDENTITY" ]; then
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
codesign -f --timestamp --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama dist/Ollama.app/Contents/MacOS/Ollama
codesign -f --timestamp --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama dist/Ollama.app/Contents/Resources/ollama
codesign -f --timestamp --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama dist/Ollama.app
ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
rm -rf dist/Ollama.app
xcrun notarytool submit dist/Ollama-darwin.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
unzip dist/Ollama-darwin.zip -d dist
rm -f dist/Ollama-darwin.zip
xcrun stapler staple "dist/Ollama.app"
ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
rm -rf dist/Ollama.app
else
echo "Skipping code signing - set APPLE_IDENTITY"
fi
chmod +x dist/ollama
# build and optionally sign the mac app
npm install --prefix macapp
if [ -n "$APPLE_IDENTITY" ]; then
npm run --prefix macapp make:sign
else
npm run --prefix macapp make
fi
cp macapp/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
# sign the binary and rename it
if [ -n "$APPLE_IDENTITY" ]; then

View File

@@ -27,7 +27,7 @@ function checkEnv() {
} else {
$script:NVIDIA_DIR=$env:NVIDIA_DIR
}
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-amd64"
@@ -70,7 +70,7 @@ function buildOllama() {
write-host "Building ollama CLI"
if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
& go generate ./...
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} else {
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
}
@@ -82,14 +82,14 @@ function buildOllama() {
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force
cp .\ollama.exe .\dist\windows-amd64\ollama-windows-amd64.exe
cp .\ollama.exe .\dist\windows-amd64\
}
function buildApp() {
write-host "Building Ollama App"
cd "${script:SRC_DIR}\app"
& windres -l 0 -o ollama.syso ollama.rc
& go build -trimpath -ldflags "-s -w -H windowsgui -X=github.com/ollama/ollama/version.Version=$script:VERSION -X=github.com/ollama/ollama/server.mode=release" .
& windres -l 0 -o ollama.syso windows\ollama.rc
& go build -trimpath -ldflags "-s -w -H windowsgui -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
if ("${env:KEY_CONTAINER}") {
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
@@ -109,11 +109,8 @@ function gatherDependencies() {
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\"
cp "${script:NVIDIA_DIR}\cudart64_*.dll" "${script:DEPS_DIR}\"
cp "${script:NVIDIA_DIR}\cublas64_*.dll" "${script:DEPS_DIR}\"
cp "${script:NVIDIA_DIR}\cublasLt64_*.dll" "${script:DEPS_DIR}\"
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
cp "${script:SRC_DIR}\app\windows\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
if ("${env:KEY_CONTAINER}") {
write-host "about to sign"
foreach ($file in (get-childitem "${script:DEPS_DIR}/cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){
@@ -123,15 +120,6 @@ function gatherDependencies() {
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
}
if ($null -ne $env:HIP_PATH) {
# Assumes v5.7, may need adjustments for v6
rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\"
md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\"
}
}
function buildInstaller() {
@@ -139,9 +127,9 @@ function buildInstaller() {
cd "${script:SRC_DIR}\app"
$env:PKG_VERSION=$script:PKG_VERSION
if ("${env:KEY_CONTAINER}") {
& "${script:INNO_SETUP_DIR}\ISCC.exe" /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\ollama.iss
& "${script:INNO_SETUP_DIR}\ISCC.exe" /SMySignTool="${script:SignTool} sign /fd sha256 /t http://timestamp.digicert.com /f ${script:OLLAMA_CERT} /csp `$qGoogle Cloud KMS Provider`$q /kc ${env:KEY_CONTAINER} `$f" .\windows\ollama.iss
} else {
& "${script:INNO_SETUP_DIR}\ISCC.exe" .\ollama.iss
& "${script:INNO_SETUP_DIR}\ISCC.exe" .\windows\ollama.iss
}
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}

View File

@@ -1,25 +0,0 @@
# Set your variables here.
REPO="jmorganca/ollama"
# Check if VERSION is set
if [[ -z "${VERSION}" ]]; then
echo "VERSION is not set. Please set the VERSION environment variable."
exit 1
fi
OS=$(go env GOOS)
./script/build_${OS}.sh
# Create a new tag if it doesn't exist.
if ! git rev-parse v$VERSION >/dev/null 2>&1; then
git tag v$VERSION
fi
git push origin v$VERSION
# Create a new release.
gh release create -p v$VERSION -t v$VERSION
# Upload the zip file.
gh release upload v$VERSION ./dist/* --clobber

10
scripts/run_darwin.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
set -e
rm -rf $TMPDIR/Ollama.app
cp -R app/darwin/Ollama.app $TMPDIR/Ollama.app
mkdir -p $TMPDIR/Ollama.app/Contents/Resources $TMPDIR/Ollama.app/Contents/MacOS
go build -o $TMPDIR/Ollama.app/Contents/Resources/ollama .
go build -C app -o $TMPDIR/Ollama.app/Contents/MacOS/Ollama .
$TMPDIR/Ollama.app/Contents/MacOS/Ollama

View File

@@ -29,6 +29,7 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@@ -701,36 +702,39 @@ func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string
return path, nil
}
func CopyModel(src, dest string) error {
srcModelPath := ParseModelPath(src)
srcPath, err := srcModelPath.GetManifestPath()
func CopyModel(src, dst model.Name) error {
if !dst.IsFullyQualified() {
return model.Unqualified(dst)
}
if !src.IsFullyQualified() {
return model.Unqualified(src)
}
manifests, err := GetManifestPath()
if err != nil {
return err
}
destModelPath := ParseModelPath(dest)
destPath, err := destModelPath.GetManifestPath()
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
dstpath := filepath.Join(manifests, dst.Filepath())
if err := os.MkdirAll(filepath.Dir(dstpath), 0o755); err != nil {
return err
}
// copy the file
input, err := os.ReadFile(srcPath)
srcpath := filepath.Join(manifests, src.Filepath())
srcfile, err := os.Open(srcpath)
if err != nil {
fmt.Println("Error reading file:", err)
return err
}
defer srcfile.Close()
err = os.WriteFile(destPath, input, 0o644)
dstfile, err := os.Create(dstpath)
if err != nil {
fmt.Println("Error reading file:", err)
return err
}
defer dstfile.Close()
return nil
_, err = io.Copy(dstfile, srcfile)
return err
}
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {

View File

@@ -29,6 +29,7 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@@ -145,6 +146,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
select {
case runner = <-rCh:
case err = <-eCh:
if errors.Is(err, context.Canceled) {
c.JSON(499, gin.H{"error": "request canceled"})
return
}
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
@@ -388,6 +394,11 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
select {
case runner = <-rCh:
case err = <-eCh:
if errors.Is(err, context.Canceled) {
c.JSON(499, gin.H{"error": "request canceled"})
return
}
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
@@ -788,34 +799,34 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
}
func (s *Server) CopyModelHandler(c *gin.Context) {
var req api.CopyRequest
err := c.ShouldBindJSON(&req)
switch {
case errors.Is(err, io.EOF):
var r api.CopyRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
return
case err != nil:
} else if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.Source == "" || req.Destination == "" {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
src := model.ParseName(r.Source)
if !src.IsValid() {
_ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
}
dst := model.ParseName(r.Destination)
if !dst.IsValid() {
_ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
}
if len(c.Errors) > 0 {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
return
}
if err := ParseModelPath(req.Destination).Validate(); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if err := CopyModel(req.Source, req.Destination); err != nil {
if os.IsNotExist(err) {
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
}
return
if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
} else if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
}
}
@@ -1215,6 +1226,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
select {
case runner = <-rCh:
case err = <-eCh:
if errors.Is(err, context.Canceled) {
c.JSON(499, gin.H{"error": "request canceled"})
return
}
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}

View File

@@ -23,7 +23,6 @@ import (
type LlmRequest struct {
ctx context.Context //nolint:containedctx
model *Model
ggml *llm.GGML // TODO - how large is this, and do we need to free it after we've finished loading?
opts api.Options
sessionDuration time.Duration
successCh chan *runnerRef
@@ -39,7 +38,7 @@ type Scheduler struct {
loaded map[string]*runnerRef
loadedMu sync.Mutex
loadFn func(req *LlmRequest, gpus gpu.GpuInfoList)
loadFn func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
newServerFn func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error)
getGpuFn func() gpu.GpuInfoList
}
@@ -47,6 +46,7 @@ type Scheduler struct {
// TODO set this to zero after a release or two, to enable multiple models by default
var loadedMax = 1 // Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
var maxQueuedRequests = 10 // TODO configurable
var numParallel = 1
func InitScheduler(ctx context.Context) *Scheduler {
maxRunners := os.Getenv("OLLAMA_MAX_LOADED_MODELS")
@@ -58,6 +58,14 @@ func InitScheduler(ctx context.Context) *Scheduler {
loadedMax = m
}
}
if onp := os.Getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
p, err := strconv.Atoi(onp)
if err != nil || p <= 0 {
slog.Error("invalid parallel setting, must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
} else {
numParallel = p
}
}
sched := &Scheduler{
pendingReqCh: make(chan *LlmRequest, maxQueuedRequests),
@@ -74,20 +82,16 @@ func InitScheduler(ctx context.Context) *Scheduler {
// context must be canceled to decrement ref count and release the runner
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
ggml, err := llm.LoadModel(model.ModelPath)
req := &LlmRequest{
ctx: c,
model: model,
ggml: ggml,
opts: opts,
sessionDuration: sessionDuration,
successCh: make(chan *runnerRef),
errCh: make(chan error, 1),
}
if err != nil {
req.errCh <- err
return req.successCh, req.errCh
}
// context split across parallel threads
opts.NumCtx = opts.NumCtx * numParallel
select {
case s.pendingReqCh <- req:
default:
@@ -130,28 +134,39 @@ func (s *Scheduler) processPending(ctx context.Context) {
pending.useLoadedRunner(runner, s.finishedReqCh)
break
}
} else if loadedCount == 0 {
slog.Debug("loading first model", "model", pending.model.ModelPath)
gpus := s.getGpuFn()
g := pickBestFitGPUs(pending, gpus)
if g != nil {
gpus = g
}
s.loadFn(pending, gpus)
break
} else if loadedMax > 0 && loadedCount >= loadedMax {
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
runnerToExpire = s.findRunnerToUnload(pending)
} else {
// More than one loaded model, so we have to see if the new one fits
// Either no models are loaded or below loadedMax
// Get a refreshed GPU list
gpus := s.getGpuFn()
// Load model for fitting
ggml, err := llm.LoadModel(pending.model.ModelPath)
if err != nil {
pending.errCh <- err
break
}
// No models loaded. Load the model but prefer the best fit.
if loadedCount == 0 {
slog.Debug("loading first model", "model", pending.model.ModelPath)
g := pickBestFitGPUs(pending, ggml, gpus)
if g != nil {
gpus = g
}
s.loadFn(pending, ggml, gpus)
break
}
// More than one loaded model, so we have to see if the new one fits
// Update free memory from currently loaded models
s.updateFreeSpace(gpus)
gpus = pickBestFitGPUs(pending, gpus)
gpus = pickBestFitGPUs(pending, ggml, gpus)
if gpus != nil {
slog.Debug("new model fits with existing models, loading")
s.loadFn(pending, gpus)
s.loadFn(pending, ggml, gpus)
break
}
runnerToExpire = s.findRunnerToUnload(pending)
@@ -282,8 +297,8 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
}()
}
func (s *Scheduler) load(req *LlmRequest, gpus gpu.GpuInfoList) {
llama, err := s.newServerFn(gpus, req.model.ModelPath, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) {
llama, err := s.newServerFn(gpus, req.model.ModelPath, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts)
if err != nil {
// some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to
@@ -417,16 +432,21 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
runner.refMu.Lock()
defer runner.refMu.Unlock()
// Ignore the NumGPU settings for comparison
optsExisting := runner.Options.Runner
optsExisting.NumGPU = -1
optsNew := req.opts.Runner
optsNew.NumGPU = -1
timeout := 10 * time.Second
if runner.loading {
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
}
ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
// Don't reload runner if num_gpu=-1 was provided
optsExisting := runner.Options.Runner
optsNew := req.opts.Runner
if optsNew.NumGPU < 0 {
optsExisting.NumGPU = -1
optsNew.NumGPU = -1
}
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
@@ -434,6 +454,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
runner.llama.Ping(ctx) != nil {
return true
}
return false
}
@@ -454,7 +475,7 @@ func (a ByDuration) Less(i, j int) bool {
// pickBestFitGPUs will try to find the optimal placement of the model in the available GPUs where the model fully fits
// If the model can not be fit fully within the available GPU(s) nil is returned
func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.GpuInfoList {
var estimatedVRAM uint64
for _, gl := range gpus.ByLibrary() {
var ok bool
@@ -466,7 +487,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
// First attempt to fit the model into a single GPU
for _, g := range sgl {
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
slog.Debug("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM))
return []gpu.GpuInfo{g}
}
@@ -477,7 +498,7 @@ func pickBestFitGPUs(req *LlmRequest, gpus gpu.GpuInfoList) gpu.GpuInfoList {
// - try subsets of GPUs instead of just falling back to 1 or all in a family
// Now try all the GPUs
if ok, estimatedVRAM = llm.PredictServerFit(gl, req.ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
return gl
}

View File

@@ -47,6 +47,7 @@ func TestLoad(t *testing.T) {
ctx, done := context.WithTimeout(context.Background(), 5*time.Millisecond)
defer done()
s := InitScheduler(ctx)
var ggml *llm.GGML // value not used in tests
req := &LlmRequest{
ctx: ctx,
model: &Model{ModelPath: "foo"},
@@ -59,7 +60,7 @@ func TestLoad(t *testing.T) {
return nil, fmt.Errorf("something failed to load model blah")
}
gpus := gpu.GpuInfoList{}
s.load(req, gpus)
s.load(req, ggml, gpus)
require.Len(t, req.successCh, 0)
require.Len(t, req.errCh, 1)
require.Len(t, s.loaded, 0)
@@ -70,7 +71,7 @@ func TestLoad(t *testing.T) {
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
return server, nil
}
s.load(req, gpus)
s.load(req, ggml, gpus)
select {
case err := <-req.errCh:
require.NoError(t, err)
@@ -82,7 +83,7 @@ func TestLoad(t *testing.T) {
req.model.ModelPath = "dummy_model_path"
server.waitResp = fmt.Errorf("wait failure")
s.load(req, gpus)
s.load(req, ggml, gpus)
select {
case err := <-req.errCh:
require.Contains(t, err.Error(), "wait failure")
@@ -101,6 +102,7 @@ type bundle struct {
ctxDone func()
srv *mockLlm
req *LlmRequest
ggml *llm.GGML
}
func (scenario *bundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options) (llm.LlamaServer, error) {
@@ -132,14 +134,15 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
})
assert.Nil(t, err)
fname := f.Name()
model := &Model{Name: modelName, ModelPath: fname}
ggml, err := llm.LoadModel(model.ModelPath)
scenario.ggml, err = llm.LoadModel(model.ModelPath)
require.NoError(t, err)
scenario.req = &LlmRequest{
ctx: scenario.ctx,
model: model,
ggml: ggml,
sessionDuration: 5 * time.Millisecond,
successCh: make(chan *runnerRef, 1),
errCh: make(chan error, 1),
@@ -157,13 +160,13 @@ func TestRequests(t *testing.T) {
scenario1a.req.sessionDuration = 0
scenario1b := newScenario(t, ctx, "ollama-model-1", 11)
scenario1b.req.model = scenario1a.req.model
scenario1b.req.ggml = scenario1a.req.ggml
scenario1b.ggml = scenario1a.ggml
scenario1b.req.sessionDuration = 0
// simple reload of same model
scenario2a := newScenario(t, ctx, "ollama-model-1", 20)
scenario2a.req.model = scenario1a.req.model
scenario2a.req.ggml = scenario1a.req.ggml
scenario2a.ggml = scenario1a.ggml
// Multiple loaded models
scenario3a := newScenario(t, ctx, "ollama-model-3a", 1*format.GigaByte)
@@ -322,13 +325,14 @@ func TestGetRunner(t *testing.T) {
successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, successCh1c, 0)
require.Len(t, errCh1c, 0)
time.Sleep(5 * time.Millisecond)
require.Len(t, s.loaded, 0)
require.Len(t, errCh1c, 1)
err = <-errCh1c
require.Contains(t, err.Error(), "bad path")
scenario1b.ctxDone()
time.Sleep(5 * time.Millisecond)
require.Len(t, s.loaded, 0)
}
// TODO - add one scenario that triggers the bogus finished event with positive ref count
@@ -366,7 +370,9 @@ func TestPrematureExpired(t *testing.T) {
require.LessOrEqual(t, len(s.finishedReqCh), 1)
time.Sleep(10 * time.Millisecond)
require.Len(t, s.finishedReqCh, 0)
s.loadedMu.Lock()
require.Len(t, s.loaded, 0)
s.loadedMu.Unlock()
// also shouldn't happen in real life
s.finishedReqCh <- scenario1a.req
@@ -426,7 +432,6 @@ func TestUpdateFreeSpace(t *testing.T) {
s.updateFreeSpace(gpus)
require.Equal(t, uint64(850), gpus[0].FreeMemory)
require.Equal(t, uint64(1850), gpus[1].FreeMemory)
}
func TestFindRunnerToUnload(t *testing.T) {
@@ -485,6 +490,9 @@ func TestNeedsReload(t *testing.T) {
require.False(t, resp)
req.opts.NumGPU = 99
resp = runner.needsReload(ctx, req)
require.True(t, resp)
req.opts.NumGPU = -1
resp = runner.needsReload(ctx, req)
require.False(t, resp)
}

View File

@@ -1,87 +0,0 @@
package model
import (
"fmt"
"log/slog"
"strings"
"unicode"
)
// Digest represents a digest of a model Manifest. It is a comparable value
// type and is immutable.
//
// The zero Digest is not a valid digest.
type Digest struct {
s string
}
// Split returns the digest type and the digest value.
func (d Digest) Split() (typ, digest string) {
typ, digest, _ = strings.Cut(d.s, "-")
return
}
// String returns the digest in the form of "<digest-type>-<digest>", or the
// empty string if the digest is invalid.
func (d Digest) String() string { return d.s }
// IsValid returns true if the digest is valid (not zero).
//
// A valid digest may be created only by ParseDigest, or
// ParseName(name).Digest().
func (d Digest) IsValid() bool { return d.s != "" }
// LogValue implements slog.Value.
func (d Digest) LogValue() slog.Value {
return slog.StringValue(d.String())
}
var (
_ slog.LogValuer = Digest{}
)
// ParseDigest parses a string in the form of "<digest-type>-<digest>" into a
// Digest.
func ParseDigest(s string) Digest {
typ, digest, ok := strings.Cut(s, "-")
if !ok {
typ, digest, ok = strings.Cut(s, ":")
}
if ok && isValidDigestType(typ) && isValidHex(digest) && len(digest) >= 2 {
return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
}
return Digest{}
}
func MustParseDigest(s string) Digest {
d := ParseDigest(s)
if !d.IsValid() {
panic(fmt.Sprintf("invalid digest: %q", s))
}
return d
}
func isValidDigestType(s string) bool {
if len(s) == 0 {
return false
}
for _, r := range s {
if !unicode.IsLower(r) && !unicode.IsDigit(r) {
return false
}
}
return true
}
func isValidHex(s string) bool {
if len(s) == 0 {
return false
}
for i := range s {
c := s[i]
if c < '0' || c > '9' && c < 'a' || c > 'f' {
return false
}
}
return true
}

View File

@@ -1,46 +0,0 @@
package model
import "testing"
var testDigests = map[string]Digest{
"": {},
"sha256-1234": {s: "sha256-1234"},
"sha256-5678": {s: "sha256-5678"},
"blake2-9abc": {s: "blake2-9abc"},
"-1234": {},
"sha256-": {},
"sha256-1234-5678": {},
"sha256-P": {}, // invalid hex
"sha256-1234P": {},
"---": {},
}
func TestDigestParse(t *testing.T) {
// Test cases.
for s, want := range testDigests {
got := ParseDigest(s)
t.Logf("ParseDigest(%q) = %#v", s, got)
if got != want {
t.Errorf("ParseDigest(%q) = %q; want %q", s, got, want)
}
}
}
func TestDigestString(t *testing.T) {
// Test cases.
for s, d := range testDigests {
want := s
if !d.IsValid() {
want = ""
}
got := d.String()
if got != want {
t.Errorf("ParseDigest(%q).String() = %q; want %q", s, got, want)
}
got = ParseDigest(s).String()
if got != want {
t.Errorf("roundtrip ParseDigest(%q).String() = %q; want %q", s, got, want)
}
}
}

View File

@@ -1,718 +1,313 @@
// Package model contains types and utilities for parsing, validating, and
// working with model names and digests.
package model
import (
"cmp"
"errors"
"fmt"
"hash/maphash"
"io"
"log/slog"
"path"
"path/filepath"
"slices"
"strings"
"sync"
"github.com/ollama/ollama/types/structs"
)
// Errors
var (
// ErrInvalidName, ErrIncompleteName, and ErrInvalidDigest are not
// used by this package, but are exported so that other packages can
// use them, instead of defining their own errors for them.
ErrInvalidName = errors.New("invalid model name")
ErrIncompleteName = errors.New("incomplete model name")
ErrInvalidDigest = errors.New("invalid digest")
// ErrUnqualifiedName represents an error where a name is not fully
// qualified. It is not used directly in this package, but is here
// to avoid other packages inventing their own error type.
// Additionally, it can be conveniently used via [Unqualified].
ErrUnqualifiedName = errors.New("unqualified name")
)
// Defaults
const (
// MaskDefault is the default mask used by [Name.DisplayShortest].
MaskDefault = "registry.ollama.ai/library/?:latest"
// MaskNothing is a mask that masks nothing.
MaskNothing = "?/?/?:?"
// DefaultFill is the default fill used by [ParseName].
FillDefault = "registry.ollama.ai/library/?:latest+Q4_0"
// FillNothing is a fill that fills nothing.
FillNothing = "?/?/?:?+?"
)
const MaxNamePartLen = 128
type PartKind int
// Levels of concreteness
const (
// Each value aligns with its index in the Name.parts array.
PartHost PartKind = iota
PartNamespace
PartModel
PartTag
PartBuild
PartDigest
// NumParts is the number of parts in a Name. In this list, it must
// follow the final part.
NumParts
PartExtraneous = -1
)
var kindNames = map[PartKind]string{
PartHost: "Host",
PartNamespace: "Namespace",
PartModel: "Name",
PartTag: "Tag",
PartBuild: "Build",
PartDigest: "Digest",
// Unqualified is a helper function that returns an error with
// ErrUnqualifiedName as the cause and the name as the message.
func Unqualified(n Name) error {
return fmt.Errorf("%w: %s", ErrUnqualifiedName, n)
}
func (k PartKind) String() string {
return cmp.Or(kindNames[k], "Unknown")
// MissingPart is used to indicate any part of a name that was "promised" by
// the presence of a separator, but is missing.
//
// The value was chosen because it is deemed unlikely to be set by a user,
// not a valid part name valid when checked by [Name.IsValid], and easy to
// spot in logs.
const MissingPart = "!MISSING!"
// DefaultName returns a name with the default values for the host, namespace,
// and tag parts. The model and digest parts are empty.
//
// - The default host is ("registry.ollama.ai")
// - The default namespace is ("library")
// - The default tag is ("latest")
func DefaultName() Name {
return Name{
Host: "registry.ollama.ai",
Namespace: "library",
Tag: "latest",
}
}
// Name is an opaque reference to a model. It holds the parts of a model
// with the case preserved, but is not directly comparable with other Names
// since model names can be represented with different casing depending on
// the use case. For instance, "Mistral" and "mistral" are the same model
// but each version may have come from different sources (e.g. copied from a
// Web page, or from a file path).
type partKind int
const (
kindHost partKind = iota
kindNamespace
kindModel
kindTag
kindDigest
)
func (k partKind) String() string {
switch k {
case kindHost:
return "host"
case kindNamespace:
return "namespace"
case kindModel:
return "model"
case kindTag:
return "tag"
case kindDigest:
return "digest"
default:
return "unknown"
}
}
// Name is a structured representation of a model name string, as defined by
// [ParseNameNoDefaults].
//
// Valid Names can ONLY be constructed by calling [ParseName].
// It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
// is valid.
//
// A Name is valid if and only if is have a valid Model part. The other parts
// are optional.
//
// A Name is considered "complete" if it has all parts present. To check if a
// Name is complete, use [Name.IsComplete].
//
// To compare two names in a case-insensitive manner, use [Name.EqualFold].
//
// The parts of a Name are:
//
// - Host: the domain of the model (optional)
// - Namespace: the namespace of the model (optional)
// - Model: the name of the model (required)
// - Tag: the tag of the model (optional)
// - Build: the build of the model; usually the quantization or "file type" (optional)
//
// The parts can be obtained in their original form by calling [Name.Parts].
//
// To check if a Name has at minimum a valid model part, use [Name.IsValid].
// It is not directly comparable with other Names. Use [Name.Equal] and
// [Name.MapHash] for determining equality and using as a map key.
type Name struct {
_ structs.Incomparable
parts [NumParts]string // host, namespace, model, tag, build, digest
// TODO(bmizerany): track offsets and hold s (raw string) here? We
// could pack the offsets all into a single uint64 since the first
// parts take less bits since their max offset is less than the max
// offset of the next part. This would save a ton of bytes per Name
// and mean zero allocations for String.
Host string
Namespace string
Model string
Tag string
RawDigest string
}
// ParseName parses s into a Name, and returns the result of filling it with
// defaults. The input string must be a valid string
// representation of a model name in the form:
// ParseName parses and assembles a Name from a name string. The
// format of a valid name string is:
//
// [host/][namespace/]<model>[:tag][+build][@<digest-type>-<digest>]
// s:
// { host } "/" { namespace } "/" { model } ":" { tag } "@" { digest }
// { host } "/" { namespace } "/" { model } ":" { tag }
// { host } "/" { namespace } "/" { model } "@" { digest }
// { host } "/" { namespace } "/" { model }
// { namespace } "/" { model } ":" { tag } "@" { digest }
// { namespace } "/" { model } ":" { tag }
// { namespace } "/" { model } "@" { digest }
// { namespace } "/" { model }
// { model } ":" { tag } "@" { digest }
// { model } ":" { tag }
// { model } "@" { digest }
// { model }
// "@" { digest }
// host:
// pattern: alphanum { alphanum | "-" | "_" | "." | ":" }*
// length: [1, 350]
// namespace:
// pattern: alphanum { alphanum | "-" | "_" }*
// length: [2, 80]
// model:
// pattern: alphanum { alphanum | "-" | "_" | "." }*
// length: [2, 80]
// tag:
// pattern: alphanum { alphanum | "-" | "_" | "." }*
// length: [1, 80]
// digest:
// pattern: alphanum { alphanum | "-" | ":" }*
// length: [2, 80]
//
// The name part is required, all others are optional. If a part is missing,
// it is left empty in the returned Name. If a part is invalid, the zero Ref
// value is returned.
// Most users should use [ParseName] instead, unless need to support
// different defaults than DefaultName.
//
// The build part is normalized to uppercase.
//
// Examples of valid paths:
//
// "example.com/library/mistral:7b+x"
// "example.com/eva/mistral:7b+Q4_0"
// "mistral:7b+x"
// "example.com/mike/mistral:latest+Q4_0"
// "example.com/bruce/mistral:latest"
// "example.com/pdevine/thisisfine:7b+Q4_0@sha256-1234567890abcdef"
//
// Examples of invalid paths:
//
// "example.com/mistral:7b+"
// "example.com/mistral:7b+Q4_0+"
// "x/y/z/z:8n+I"
// ""
//
// It returns the zero value if any part is invalid.
//
// # Fills
//
// For any valid s, the fill string is used to fill in missing parts of the
// Name. The fill string must be a valid Name with the exception that any part
// may be the string ("?"), which will not be considered for filling.
func ParseNameFill(s, fill string) Name {
var r Name
parts(s)(func(kind PartKind, part string) bool {
if kind == PartDigest && !ParseDigest(part).IsValid() {
r = Name{}
return false
}
if kind == PartExtraneous || !IsValidNamePart(kind, part) {
r = Name{}
return false
}
r.parts[kind] = part
return true
})
if r.IsValid() || r.IsResolved() {
return fillName(r, fill)
}
return Name{}
}
// ParseName parses s into a Name, and returns the result of filling it
// with FillDefault. The input string must be a valid string representation
// of a model
// The name returned is not guaranteed to be valid. If it is not valid, the
// field values are left in an undefined state. Use [Name.IsValid] to check
// if the name is valid.
func ParseName(s string) Name {
return ParseNameFill(s, "")
return Merge(ParseNameBare(s), DefaultName())
}
func parseMask(s string) Name {
var r Name
parts(s)(func(kind PartKind, part string) bool {
if part == "?" {
// mask part; treat as empty but valid
return true
}
if !IsValidNamePart(kind, part) {
panic(fmt.Errorf("invalid mask part %s: %q", kind, part))
}
r.parts[kind] = part
return true
})
return r
}
// ParseNameBare parses s as a name string and returns a Name. No merge with
// [DefaultName] is performed.
func ParseNameBare(s string) Name {
var n Name
var promised bool
func MustParseName(s, fill string) Name {
r := ParseNameFill(s, fill)
if !r.IsValid() {
panic("invalid Name: " + s)
s, n.RawDigest, promised = cutLast(s, "@")
if promised && n.RawDigest == "" {
n.RawDigest = MissingPart
}
return r
}
// fillName fills in the missing parts of dst with the parts of src.
//
// The returned Name will only be valid if dst is valid.
//
// It skipps fill parts that are "?".
func fillName(r Name, fill string) Name {
fill = cmp.Or(fill, FillDefault)
f := parseMask(fill)
if fill != FillNothing && f.IsZero() {
panic("invalid fill")
s, n.Tag, _ = cutPromised(s, ":")
s, n.Model, promised = cutPromised(s, "/")
if !promised {
n.Model = s
return n
}
for i := range r.parts {
if f.parts[i] == "?" {
continue
}
r.parts[i] = cmp.Or(r.parts[i], f.parts[i])
s, n.Namespace, promised = cutPromised(s, "/")
if !promised {
n.Namespace = s
return n
}
return r
n.Host = s
return n
}
// WithBuild returns a copy of r with the build set to the given string.
func (r Name) WithBuild(build string) Name {
r.parts[PartBuild] = build
return r
// Merge merges the host, namespace, and tag parts of the two names,
// preferring the non-empty parts of a.
func Merge(a, b Name) Name {
a.Host = cmp.Or(a.Host, b.Host)
a.Namespace = cmp.Or(a.Namespace, b.Namespace)
a.Tag = cmp.Or(a.Tag, b.Tag)
return a
}
func (r Name) WithDigest(digest Digest) Name {
r.parts[PartDigest] = digest.String()
return r
}
var mapHashSeed = maphash.MakeSeed()
// MapHash returns a case insensitive hash for use in maps and equality
// checks. For a convenient way to compare names, use [Name.EqualFold].
//
//nolint:errcheck
func (r Name) MapHash() uint64 {
// correctly hash the parts with case insensitive comparison
var h maphash.Hash
h.SetSeed(mapHashSeed)
for _, part := range r.parts {
// downcase the part for hashing
for i := range part {
c := part[i]
if c >= 'A' && c <= 'Z' {
c = c - 'A' + 'a'
}
h.WriteByte(c)
}
// String returns the name string, in the format that [ParseNameNoDefaults]
// accepts as valid, if [Name.IsValid] reports true; otherwise the empty
// string is returned.
func (n Name) String() string {
var b strings.Builder
if n.Host != "" {
b.WriteString(n.Host)
b.WriteByte('/')
}
return h.Sum64()
}
func (r Name) slice(from, to PartKind) Name {
var v Name
copy(v.parts[from:to+1], r.parts[from:to+1])
return v
}
// DisplayShortest returns the shortest possible, masked display string in form:
//
// [host/][<namespace>/]<model>[:<tag>]
//
// # Masks
//
// The mask is a string that specifies which parts of the name to omit based
// on case-insensitive comparison. [Name.DisplayShortest] omits parts of the name
// that are the same as the mask, moving from left to right until the first
// unequal part is found. It then moves right to left until the first unequal
// part is found. The result is the shortest possible display string.
//
// Unlike a [Name] the mask can contain "?" characters which are treated as
// wildcards. A "?" will never match a part of the name, since a valid name
// can never contain a "?" character.
//
// For example: Given a Name ("registry.ollama.ai/library/mistral:latest") masked
// with ("registry.ollama.ai/library/?:latest") will produce the display string
// ("mistral").
//
// If mask is the empty string, then [MaskDefault] is used.
//
// DisplayShortest panics if the mask is not the empty string, MaskNothing, and
// invalid.
//
// # Builds
//
// For now, DisplayShortest does consider the build or return one in the
// result. We can lift this restriction when needed.
func (r Name) DisplayShortest(mask string) string {
mask = cmp.Or(mask, MaskDefault)
d := parseMask(mask)
if mask != MaskNothing && r.IsZero() {
panic("invalid Name")
if n.Namespace != "" {
b.WriteString(n.Namespace)
b.WriteByte('/')
}
for i := range PartTag {
if !strings.EqualFold(r.parts[i], d.parts[i]) {
break
}
r.parts[i] = ""
b.WriteString(n.Model)
if n.Tag != "" {
b.WriteByte(':')
b.WriteString(n.Tag)
}
for i := PartTag; i >= 0; i-- {
if !strings.EqualFold(r.parts[i], d.parts[i]) {
break
}
r.parts[i] = ""
if n.RawDigest != "" {
b.WriteByte('@')
b.WriteString(n.RawDigest)
}
return r.slice(PartHost, PartTag).DisplayLong()
}
// DisplayLongest returns the result of r.DisplayShortest(MaskNothing).
func (r Name) DisplayLongest() string {
return r.DisplayShortest(MaskNothing)
}
var seps = [...]string{
PartHost: "/",
PartNamespace: "/",
PartModel: ":",
PartTag: "+",
PartBuild: "@",
PartDigest: "",
}
// WriteTo implements io.WriterTo. It writes the fullest possible display
// string in form:
//
// <host>/<namespace>/<model>:<tag>+<build>@<digest-type>-<digest>
//
// Missing parts and their separators are not written.
//
// The full digest is always prefixed with "@". That is if [Name.IsValid]
// reports false and [Name.IsResolved] reports true, then the string is
// returned as "@<digest-type>-<digest>".
func (r Name) writeTo(w io.StringWriter) error {
var partsWritten int
for i := range r.parts {
if r.parts[i] == "" {
continue
}
if partsWritten > 0 || i == int(PartDigest) {
if _, err := w.WriteString(seps[i-1]); err != nil {
return err
}
}
if _, err := w.WriteString(r.parts[i]); err != nil {
return err
}
partsWritten++
}
return nil
}
var builderPool = sync.Pool{
New: func() interface{} {
return &strings.Builder{}
},
}
// DisplayLong returns the fullest possible display string in form:
//
// <host>/<namespace>/<model>:<tag>+<build>
//
// If any part is missing, it is omitted from the display string.
func (r Name) DisplayLong() string {
b := builderPool.Get().(*strings.Builder)
defer builderPool.Put(b)
b.Reset()
b.Grow(50) // arbitrarily long enough for most names
_ = r.writeTo(b)
return b.String()
}
// GoString implements fmt.GoStringer. It returns a string suitable for
// debugging and logging. It is similar to [Name.DisplayLong] but it always
// returns a string that includes all parts of the Name, with missing parts
// replaced with a ("?").
func (r Name) GoString() string {
for i := range r.parts {
r.parts[i] = cmp.Or(r.parts[i], "?")
}
return r.DisplayLong()
}
// LogValue implements slog.Valuer.
func (r Name) LogValue() slog.Value {
return slog.StringValue(r.GoString())
}
// IsComplete reports whether the Name is fully qualified. That is it has a
// domain, namespace, name, tag, and build.
func (r Name) IsComplete() bool {
return !slices.Contains(r.parts[:PartDigest], "")
}
// IsCompleteNoBuild is like [Name.IsComplete] but it does not require the
// build part to be present.
func (r Name) IsCompleteNoBuild() bool {
return !slices.Contains(r.parts[:PartBuild], "")
}
// IsResolved reports true if the Name has a valid digest.
//
// It is possible to have a valid Name, or a complete Name that is not
// resolved.
func (r Name) IsResolved() bool {
return r.Digest().IsValid()
}
// Digest returns the digest part of the Name, if any.
//
// If Digest returns a non-empty string, then [Name.IsResolved] will return
// true, and digest is considered valid.
func (r Name) Digest() Digest {
// This was already validated by ParseName, so we can just return it.
return Digest{r.parts[PartDigest]}
}
// EqualFold reports whether r and o are equivalent model names, ignoring
// case.
func (r Name) EqualFold(o Name) bool {
return r.CompareFold(o) == 0
}
// CompareFold performs a case-insensitive cmp.Compare on r and o.
//
// This can be used with [slices.SortFunc].
//
// For simple equality checks, use [Name.EqualFold].
func (r Name) CompareFold(o Name) int {
return slices.CompareFunc(r.parts[:], o.parts[:], compareFold)
}
func compareFold(a, b string) int {
return slices.CompareFunc([]rune(a), []rune(b), func(a, b rune) int {
return cmp.Compare(downcase(a), downcase(b))
})
}
func downcase(r rune) rune {
if r >= 'A' && r <= 'Z' {
return r - 'A' + 'a'
}
return r
}
func (r Name) Host() string { return r.parts[PartHost] }
func (r Name) Namespace() string { return r.parts[PartNamespace] }
func (r Name) Model() string { return r.parts[PartModel] }
func (r Name) Build() string { return r.parts[PartBuild] }
func (r Name) Tag() string { return r.parts[PartTag] }
// iter_Seq2 is a iter.Seq2 defined here to avoid the current build
// restrictions in the go1.22 iter package requiring the
// goexperiment.rangefunc tag to be set via the GOEXPERIMENT=rangefunc flag,
// which we are not yet ready to support.
//
// Once we are ready to support rangefunc, this can be removed and replaced
// with the iter.Seq2 type.
type iter_Seq2[A, B any] func(func(A, B) bool)
// Parts returns a sequence of the parts of a Name string from most specific
// to least specific.
//
// It normalizes the input string by removing "http://" and "https://" only.
// No other normalizations are performed.
func parts(s string) iter_Seq2[PartKind, string] {
return func(yield func(PartKind, string) bool) {
if strings.HasPrefix(s, "http://") {
s = strings.TrimPrefix(s, "http://")
} else {
s = strings.TrimPrefix(s, "https://")
}
if len(s) > MaxNamePartLen || len(s) == 0 {
return
}
numConsecutiveDots := 0
partLen := 0
state, j := PartDigest, len(s)
for i := len(s) - 1; i >= 0; i-- {
if partLen++; partLen > MaxNamePartLen {
// catch a part that is too long early, so
// we don't keep spinning on it, waiting for
// an isInValidPart check which would scan
// over it again.
yield(state, s[i+1:j])
return
}
switch s[i] {
case '@':
switch state {
case PartDigest:
if !yield(PartDigest, s[i+1:j]) {
return
}
if i == 0 {
// This is the form
// "@<digest>" which is valid.
//
// We're done.
return
}
state, j, partLen = PartBuild, i, 0
default:
yield(PartExtraneous, s[i+1:j])
return
}
case '+':
switch state {
case PartBuild, PartDigest:
if !yield(PartBuild, s[i+1:j]) {
return
}
state, j, partLen = PartTag, i, 0
default:
yield(PartExtraneous, s[i+1:j])
return
}
case ':':
switch state {
case PartTag, PartBuild, PartDigest:
if !yield(PartTag, s[i+1:j]) {
return
}
state, j, partLen = PartModel, i, 0
case PartHost:
// noop: support for host:port
default:
yield(PartExtraneous, s[i+1:j])
return
}
case '/':
switch state {
case PartModel, PartTag, PartBuild, PartDigest:
if !yield(PartModel, s[i+1:j]) {
return
}
state, j = PartNamespace, i
case PartNamespace:
if !yield(PartNamespace, s[i+1:j]) {
return
}
state, j, partLen = PartHost, i, 0
default:
yield(PartExtraneous, s[i+1:j])
return
}
default:
if s[i] == '.' {
if numConsecutiveDots++; numConsecutiveDots > 1 {
yield(state, "")
return
}
} else {
numConsecutiveDots = 0
}
}
}
if state <= PartNamespace {
yield(state, s[:j])
} else {
yield(PartModel, s[:j])
}
}
}
func (r Name) IsZero() bool {
return r.parts == [NumParts]string{}
}
// IsValid reports if a model has at minimum a valid model part.
func (r Name) IsValid() bool {
// Parts ensures we only have valid parts, so no need to validate
// them here, only check if we have a name or not.
return r.parts[PartModel] != ""
}
// ParseNameFromURLPath parses forms of a URL path into a Name. Specifically,
// it trims any leading "/" and then calls [ParseName] with fill.
func ParseNameFromURLPath(s, fill string) Name {
s = strings.TrimPrefix(s, "/")
return ParseNameFill(s, fill)
}
func ParseNameFromURLPathFill(s, fill string) Name {
return ParseNameFill(s, fill)
}
// URLPath returns a complete, canonicalized, relative URL path using the parts of a
// complete Name.
//
// The parts maintain their original case.
//
// Example:
//
// ParseName("example.com/namespace/model:tag+build").URLPath() // returns "/example.com/namespace/model:tag"
func (r Name) DisplayURLPath() string {
return r.DisplayShortest(MaskNothing)
}
// URLPath returns a complete, canonicalized, relative URL path using the parts of a
// complete Name in the form:
//
// <host>/<namespace>/<model>/<tag>
//
// The parts are downcased.
func (r Name) URLPath() string {
return strings.ToLower(path.Join(r.parts[:PartBuild]...))
}
// ParseNameFromFilepath parses a file path into a Name. The input string must be a
// valid file path representation of a model name in the form:
//
// host/namespace/model/tag/build
//
// The zero valid is returned if s does not contain all path elements
// leading up to the model part, or if any path element is an invalid part
// for the its corresponding part kind.
//
// The fill string is used to fill in missing parts of any constructed Name.
// See [ParseName] for more information on the fill string.
func ParseNameFromFilepath(s, fill string) Name {
var r Name
for i := range PartBuild + 1 {
part, rest, _ := strings.Cut(s, string(filepath.Separator))
if !IsValidNamePart(i, part) {
return Name{}
}
r.parts[i] = part
s = rest
if s == "" {
break
}
}
if s != "" {
return Name{}
}
if !r.IsValid() {
return Name{}
}
return fillName(r, fill)
}
// Filepath returns a complete, canonicalized, relative file path using the
// parts of a complete Name.
//
// Each parts is downcased, except for the build part which is upcased.
//
// Example:
//
// ParseName("example.com/namespace/model:tag+build").Filepath() // returns "example.com/namespace/model/tag/BUILD"
func (r Name) Filepath() string {
for i := range r.parts {
if PartKind(i) == PartBuild {
r.parts[i] = strings.ToUpper(r.parts[i])
} else {
r.parts[i] = strings.ToLower(r.parts[i])
}
}
return filepath.Join(r.parts[:]...)
}
// FilepathNoBuild returns a complete, canonicalized, relative file path using
// the parts of a complete Name, but without the build part.
func (r Name) FilepathNoBuild() string {
for i := range PartBuild {
r.parts[i] = strings.ToLower(r.parts[i])
}
return filepath.Join(r.parts[:PartBuild]...)
}
// IsValidNamePart reports if s contains all valid characters for the given
// part kind and is under MaxNamePartLen bytes.
func IsValidNamePart(kind PartKind, s string) bool {
if len(s) > MaxNamePartLen {
// IsValid reports whether all parts of the name are present and valid. The
// digest is a special case, and is checked for validity only if present.
func (n Name) IsValid() bool {
if n.RawDigest != "" && !isValidPart(kindDigest, n.RawDigest) {
return false
}
if s == "" {
return false
return n.IsFullyQualified()
}
// IsFullyQualified returns true if all parts of the name are present and
// valid without the digest.
func (n Name) IsFullyQualified() bool {
var parts = []string{
n.Host,
n.Namespace,
n.Model,
n.Tag,
}
var consecutiveDots int
for _, c := range []byte(s) {
if c == '.' {
if consecutiveDots++; consecutiveDots >= 2 {
return false
}
} else {
consecutiveDots = 0
}
if !isValidByteFor(kind, c) {
for i, part := range parts {
if !isValidPart(partKind(i), part) {
return false
}
}
return true
}
func isValidByteFor(kind PartKind, c byte) bool {
if kind == PartNamespace && c == '.' {
// Filepath returns a canonical filepath that represents the name with each part from
// host to tag as a directory in the form:
//
// {host}/{namespace}/{model}/{tag}
//
// It uses the system's filepath separator and ensures the path is clean.
//
// It panics if the name is not fully qualified. Use [Name.IsFullyQualified]
// to check if the name is fully qualified.
func (n Name) Filepath() string {
if !n.IsFullyQualified() {
panic("illegal attempt to get filepath of invalid name")
}
return filepath.Join(
strings.ToLower(n.Host),
strings.ToLower(n.Namespace),
strings.ToLower(n.Model),
strings.ToLower(n.Tag),
)
}
// LogValue returns a slog.Value that represents the name as a string.
func (n Name) LogValue() slog.Value {
return slog.StringValue(n.String())
}
func isValidLen(kind partKind, s string) bool {
switch kind {
case kindHost:
return len(s) >= 1 && len(s) <= 350
case kindTag:
return len(s) >= 1 && len(s) <= 80
default:
return len(s) >= 2 && len(s) <= 80
}
}
func isValidPart(kind partKind, s string) bool {
if !isValidLen(kind, s) {
return false
}
if kind == PartHost && c == ':' {
return true
for i := range s {
if i == 0 {
if !isAlphanumeric(s[i]) {
return false
}
continue
}
switch s[i] {
case '_', '-':
case '.':
if kind == kindNamespace {
return false
}
case ':':
if kind != kindHost && kind != kindDigest {
return false
}
default:
if !isAlphanumeric(s[i]) {
return false
}
}
}
if c == '.' || c == '-' {
return true
}
if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' {
return true
}
return false
return true
}
func isAlphanumeric(c byte) bool {
return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9'
}
func cutLast(s, sep string) (before, after string, ok bool) {
i := strings.LastIndex(s, sep)
if i >= 0 {
return s[:i], s[i+len(sep):], true
}
return s, "", false
}
// cutPromised cuts the last part of s at the last occurrence of sep. If sep is
// found, the part before and after sep are returned as-is unless empty, in
// which case they are returned as MissingPart, which will cause
// [Name.IsValid] to return false.
func cutPromised(s, sep string) (before, after string, ok bool) {
before, after, ok = cutLast(s, sep)
if !ok {
return before, after, false
}
return cmp.Or(before, MissingPart), cmp.Or(after, MissingPart), true
}

View File

@@ -1,715 +1,237 @@
package model
import (
"bytes"
"cmp"
"fmt"
"log/slog"
"path/filepath"
"slices"
"strings"
"reflect"
"testing"
)
type fields struct {
host, namespace, model, tag, build string
digest string
}
const (
part80 = "88888888888888888888888888888888888888888888888888888888888888888888888888888888"
part350 = "33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"
)
func fieldsFromName(p Name) fields {
return fields{
host: p.parts[PartHost],
namespace: p.parts[PartNamespace],
model: p.parts[PartModel],
tag: p.parts[PartTag],
build: p.parts[PartBuild],
digest: p.parts[PartDigest],
}
}
var testNames = map[string]fields{
"mistral:latest": {model: "mistral", tag: "latest"},
"mistral": {model: "mistral"},
"mistral:30B": {model: "mistral", tag: "30B"},
"mistral:7b": {model: "mistral", tag: "7b"},
"mistral:7b+Q4_0": {model: "mistral", tag: "7b", build: "Q4_0"},
"mistral+KQED": {model: "mistral", build: "KQED"},
"mistral.x-3:7b+Q4_0": {model: "mistral.x-3", tag: "7b", build: "Q4_0"},
"mistral:7b+q4_0": {model: "mistral", tag: "7b", build: "q4_0"},
"llama2": {model: "llama2"},
"user/model": {namespace: "user", model: "model"},
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
"localhost:5000/ns/mistral": {host: "localhost:5000", namespace: "ns", model: "mistral"},
// invalid digest
"mistral:latest@invalid256-": {},
"mistral:latest@-123": {},
"mistral:latest@!-123": {},
"mistral:latest@1-!": {},
"mistral:latest@": {},
// resolved
"x@sha123-12": {model: "x", digest: "sha123-12"},
"@sha456-22": {digest: "sha456-22"},
"@sha456-1": {},
"@@sha123-22": {},
// preserves case for build
"x+b": {model: "x", build: "b"},
// invalid (includes fuzzing trophies)
" / / : + ": {},
" / : + ": {},
" : + ": {},
" + ": {},
" : ": {},
" / ": {},
" /": {},
"/ ": {},
"/": {},
":": {},
"+": {},
// (".") in namepsace is not allowed
"invalid.com/7b+x": {},
"invalid:7b+Q4_0:latest": {},
"in valid": {},
"invalid/y/z/foo": {},
"/0": {},
"0 /0": {},
"0 /": {},
"0/": {},
":/0": {},
"+0/00000": {},
"0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91": {},
"0//0": {},
"m+^^^": {},
"file:///etc/passwd": {},
"file:///etc/passwd:latest": {},
"file:///etc/passwd:latest+u": {},
":x": {},
"+x": {},
"x+": {},
// Disallow ("\.+") in any part to prevent path traversal anywhere
// we convert the name to a path.
"../etc/passwd": {},
".../etc/passwd": {},
"./../passwd": {},
"./0+..": {},
strings.Repeat("a", MaxNamePartLen): {model: strings.Repeat("a", MaxNamePartLen)},
strings.Repeat("a", MaxNamePartLen+1): {},
}
func TestIsValidNameLen(t *testing.T) {
if IsValidNamePart(PartNamespace, strings.Repeat("a", MaxNamePartLen+1)) {
t.Errorf("unexpectedly valid long name")
}
}
// TestConsecutiveDots tests that consecutive dots are not allowed in any
// part, to avoid path traversal. There also are some tests in testNames, but
// this test is more exhaustive and exists to emphasize the importance of
// preventing path traversal.
func TestNameConsecutiveDots(t *testing.T) {
for i := 1; i < 10; i++ {
s := strings.Repeat(".", i)
if i > 1 {
if g := ParseNameFill(s, FillNothing).DisplayLong(); g != "" {
t.Errorf("ParseName(%q) = %q; want empty string", s, g)
}
} else {
if g := ParseNameFill(s, FillNothing).DisplayLong(); g != s {
t.Errorf("ParseName(%q) = %q; want %q", s, g, s)
}
}
}
}
func TestNameParts(t *testing.T) {
var p Name
if w, g := int(NumParts), len(p.parts); w != g {
t.Errorf("Parts() = %d; want %d", g, w)
}
}
func TestNamePartString(t *testing.T) {
if g := PartKind(-2).String(); g != "Unknown" {
t.Errorf("Unknown part = %q; want %q", g, "Unknown")
}
for kind, name := range kindNames {
if g := kind.String(); g != name {
t.Errorf("%s = %q; want %q", kind, g, name)
}
}
}
func TestParseName(t *testing.T) {
for baseName, want := range testNames {
for _, prefix := range []string{"", "https://", "http://"} {
// We should get the same results with or without the
// http(s) prefixes
s := prefix + baseName
t.Run(s, func(t *testing.T) {
name := ParseNameFill(s, FillNothing)
got := fieldsFromName(name)
if got != want {
t.Errorf("ParseName(%q) = %q; want %q", s, got, want)
}
// test round-trip
if !ParseNameFill(name.DisplayLong(), FillNothing).EqualFold(name) {
t.Errorf("ParseName(%q).String() = %s; want %s", s, name.DisplayLong(), baseName)
}
})
}
}
}
func TestParseNameFill(t *testing.T) {
cases := []struct {
in string
fill string
want string
}{
{"mistral", "example.com/library/?:latest+Q4_0", "example.com/library/mistral:latest+Q4_0"},
{"mistral", "example.com/library/?:latest", "example.com/library/mistral:latest"},
{"llama2:x", "example.com/library/?:latest+Q4_0", "example.com/library/llama2:x+Q4_0"},
// Invalid
{"", "example.com/library/?:latest+Q4_0", ""},
{"llama2:?", "example.com/library/?:latest+Q4_0", ""},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
name := ParseNameFill(tt.in, tt.fill)
if g := name.DisplayLong(); g != tt.want {
t.Errorf("ParseName(%q, %q) = %q; want %q", tt.in, tt.fill, g, tt.want)
}
})
}
t.Run("invalid fill", func(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatal("expected panic")
}
}()
ParseNameFill("x", "^")
})
}
func TestParseNameHTTPDoublePrefixStrip(t *testing.T) {
cases := []string{
"http://https://valid.com/valid/valid:latest",
"https://http://valid.com/valid/valid:latest",
}
for _, s := range cases {
t.Run(s, func(t *testing.T) {
name := ParseNameFill(s, FillNothing)
if name.IsValid() {
t.Errorf("expected invalid path; got %#v", name)
}
})
}
}
func TestCompleteWithAndWithoutBuild(t *testing.T) {
func TestParseNameParts(t *testing.T) {
cases := []struct {
in string
complete bool
completeNoBuild bool
want Name
wantValidDigest bool
}{
{"", false, false},
{"incomplete/mistral:7b+x", false, false},
{"incomplete/mistral:7b+Q4_0", false, false},
{"incomplete:7b+x", false, false},
{"complete.com/x/mistral:latest+Q4_0", true, true},
{"complete.com/x/mistral:latest", false, true},
{
in: "host/namespace/model:tag",
want: Name{
Host: "host",
Namespace: "namespace",
Model: "model",
Tag: "tag",
},
},
{
in: "host/namespace/model",
want: Name{
Host: "host",
Namespace: "namespace",
Model: "model",
},
},
{
in: "namespace/model",
want: Name{
Namespace: "namespace",
Model: "model",
},
},
{
in: "model",
want: Name{
Model: "model",
},
},
{
in: "h/nn/mm:t",
want: Name{
Host: "h",
Namespace: "nn",
Model: "mm",
Tag: "t",
},
},
{
in: part80 + "/" + part80 + "/" + part80 + ":" + part80,
want: Name{
Host: part80,
Namespace: part80,
Model: part80,
Tag: part80,
},
},
{
in: part350 + "/" + part80 + "/" + part80 + ":" + part80,
want: Name{
Host: part350,
Namespace: part80,
Model: part80,
Tag: part80,
},
},
{
in: "@digest",
want: Name{
RawDigest: "digest",
},
wantValidDigest: false,
},
{
in: "model@sha256:123",
want: Name{
Model: "model",
RawDigest: "sha256:123",
},
wantValidDigest: true,
},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
p := ParseNameFill(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
if g := p.IsComplete(); g != tt.complete {
t.Errorf("Complete(%q) = %v; want %v", tt.in, g, tt.complete)
}
if g := p.IsCompleteNoBuild(); g != tt.completeNoBuild {
t.Errorf("CompleteNoBuild(%q) = %v; want %v", tt.in, g, tt.completeNoBuild)
}
})
}
// Complete uses Parts which returns a slice, but it should be
// inlined when used in Complete, preventing any allocations or
// escaping to the heap.
allocs := testing.AllocsPerRun(1000, func() {
keep(ParseNameFill("complete.com/x/mistral:latest+Q4_0", FillNothing).IsComplete())
})
if allocs > 0 {
t.Errorf("Complete allocs = %v; want 0", allocs)
}
}
func TestNameLogValue(t *testing.T) {
cases := []string{
"example.com/library/mistral:latest+Q4_0",
"mistral:latest",
"mistral:7b+Q4_0",
}
for _, s := range cases {
t.Run(s, func(t *testing.T) {
var b bytes.Buffer
log := slog.New(slog.NewTextHandler(&b, nil))
name := ParseNameFill(s, FillNothing)
log.Info("", "name", name)
want := fmt.Sprintf("name=%s", name.GoString())
got := b.String()
if !strings.Contains(got, want) {
t.Errorf("expected log output to contain %q; got %q", want, got)
got := ParseNameBare(tt.in)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("parseName(%q) = %v; want %v", tt.in, got, tt.want)
}
})
}
}
func TestNameGoString(t *testing.T) {
var testCases = map[string]bool{ // name -> valid
"host/namespace/model:tag": true,
"host/namespace/model": false,
"namespace/model": false,
"model": false,
"@sha256-1000000000000000000000000000000000000000000000000000000000000000": false,
"model@sha256-1000000000000000000000000000000000000000000000000000000000000000": false,
"model@sha256:1000000000000000000000000000000000000000000000000000000000000000": false,
// long (but valid)
part80 + "/" + part80 + "/" + part80 + ":" + part80: true,
part350 + "/" + part80 + "/" + part80 + ":" + part80: true,
"h/nn/mm:t@sha256-1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
"h/nn/mm:t@sha256:1000000000000000000000000000000000000000000000000000000000000000": true, // bare minimum part sizes
"m": false, // model too short
"n/mm:": false, // namespace too short
"h/n/mm:t": false, // namespace too short
"@t": false, // digest too short
"mm@d": false, // digest too short
// invalids
"^": false,
"mm:": false,
"/nn/mm": false,
"//": false,
"//mm": false,
"hh//": false,
"//mm:@": false,
"00@": false,
"@": false,
// not starting with alphanum
"-hh/nn/mm:tt@dd": false,
"hh/-nn/mm:tt@dd": false,
"hh/nn/-mm:tt@dd": false,
"hh/nn/mm:-tt@dd": false,
"hh/nn/mm:tt@-dd": false,
"": false,
// hosts
"host:https/namespace/model:tag": true,
// colon in non-host part before tag
"host/name:space/model:tag": false,
}
func TestNameparseNameDefault(t *testing.T) {
const name = "xx"
n := ParseName(name)
got := n.String()
want := "registry.ollama.ai/library/xx:latest"
if got != want {
t.Errorf("parseName(%q).String() = %q; want %q", name, got, want)
}
}
func TestNameIsValid(t *testing.T) {
var numStringTests int
for s, want := range testCases {
n := ParseNameBare(s)
t.Logf("n: %#v", n)
got := n.IsValid()
if got != want {
t.Errorf("parseName(%q).IsValid() = %v; want %v", s, got, want)
}
// Test roundtrip with String
if got {
got := ParseNameBare(s).String()
if got != s {
t.Errorf("parseName(%q).String() = %q; want %q", s, got, s)
}
numStringTests++
}
}
if numStringTests == 0 {
t.Errorf("no tests for Name.String")
}
}
func TestNameIsValidPart(t *testing.T) {
cases := []struct {
name string
in string
wantString string
wantGoString string // default is tt.in
kind partKind
s string
want bool
}{
{
name: "Complete Name",
in: "example.com/library/mistral:latest+Q4_0",
wantGoString: "example.com/library/mistral:latest+Q4_0@?",
},
{
name: "Short Name",
in: "mistral:latest",
wantGoString: "?/?/mistral:latest+?@?",
},
{
name: "Long Name",
in: "library/mistral:latest",
wantGoString: "?/library/mistral:latest+?@?",
},
{
name: "Case Preserved",
in: "Library/Mistral:Latest",
wantGoString: "?/Library/Mistral:Latest+?@?",
},
{
name: "With digest",
in: "Library/Mistral:Latest@sha256-123456",
wantGoString: "?/Library/Mistral:Latest+?@sha256-123456",
},
{kind: kindHost, s: "", want: false},
{kind: kindHost, s: "a", want: true},
{kind: kindHost, s: "a.", want: true},
{kind: kindHost, s: "a.b", want: true},
{kind: kindHost, s: "a:123", want: true},
{kind: kindHost, s: "a:123/aa/bb", want: false},
{kind: kindNamespace, s: "bb", want: true},
{kind: kindNamespace, s: "a.", want: false},
{kind: kindModel, s: "-h", want: false},
{kind: kindDigest, s: "sha256-1000000000000000000000000000000000000000000000000000000000000000", want: true},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
p := ParseNameFill(tt.in, FillNothing)
tt.wantGoString = cmp.Or(tt.wantGoString, tt.in)
if g := fmt.Sprintf("%#v", p); g != tt.wantGoString {
t.Errorf("GoString() = %q; want %q", g, tt.wantGoString)
t.Run(tt.s, func(t *testing.T) {
got := isValidPart(tt.kind, tt.s)
if got != tt.want {
t.Errorf("isValidPart(%s, %q) = %v; want %v", tt.kind, tt.s, got, tt.want)
}
})
}
}
func TestDisplayLongest(t *testing.T) {
g := ParseNameFill("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
if g != "example.com/library/mistral:latest" {
t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
func FuzzName(f *testing.F) {
for s := range testCases {
f.Add(s)
}
}
func TestDisplayShortest(t *testing.T) {
cases := []struct {
in string
mask string
want string
wantPanic bool
}{
{"example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/library/mistral:latest+Q4_0", "example.com/_/_:latest", "library/mistral", false},
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
{"example.com/library/mistral:latest+Q4_0", "", "example.com/library/mistral", false},
// case-insensitive
{"Example.com/library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/Library/mistral:latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/library/Mistral:latest+Q4_0", "example.com/library/_:latest", "Mistral", false},
{"example.com/library/mistral:Latest+Q4_0", "example.com/library/_:latest", "mistral", false},
{"example.com/library/mistral:Latest+q4_0", "example.com/library/_:latest", "mistral", false},
// zero value
{"", MaskDefault, "", true},
// invalid mask
{"example.com/library/mistral:latest+Q4_0", "example.com/mistral", "", true},
// DefaultMask
{"registry.ollama.ai/library/mistral:latest+Q4_0", MaskDefault, "mistral", false},
// Auto-Fill
{"x", "example.com/library/_:latest", "x", false},
{"x", "example.com/library/_:latest+Q4_0", "x", false},
{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
{"x/y:z", "a.com/library/_:latest+Q4_0", "x/y:z", false},
}
for _, tt := range cases {
t.Run("", func(t *testing.T) {
defer func() {
if tt.wantPanic {
if recover() == nil {
t.Errorf("expected panic")
}
f.Fuzz(func(t *testing.T, s string) {
n := ParseNameBare(s)
if n.IsValid() {
parts := [...]string{n.Host, n.Namespace, n.Model, n.Tag, n.RawDigest}
for _, part := range parts {
if part == ".." {
t.Errorf("unexpected .. as valid part")
}
if len(part) > 350 {
t.Errorf("part too long: %q", part)
}
}()
p := ParseNameFill(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
if g := p.DisplayShortest(tt.mask); g != tt.want {
t.Errorf("got = %q; want %q", g, tt.want)
}
})
}
}
func TestParseNameAllocs(t *testing.T) {
allocs := testing.AllocsPerRun(1000, func() {
keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
})
if allocs > 0 {
t.Errorf("ParseName allocs = %v; want 0", allocs)
}
}
func BenchmarkParseName(b *testing.B) {
b.ReportAllocs()
for range b.N {
keep(ParseNameFill("example.com/mistral:7b+Q4_0", FillNothing))
}
}
func FuzzParseNameFromFilepath(f *testing.F) {
f.Add("example.com/library/mistral/7b/Q4_0")
f.Add("example.com/../mistral/7b/Q4_0")
f.Add("example.com/x/../7b/Q4_0")
f.Add("example.com/x/../7b")
f.Fuzz(func(t *testing.T, s string) {
name := ParseNameFromFilepath(s, FillNothing)
if strings.Contains(s, "..") && !name.IsZero() {
t.Fatalf("non-zero value for path with '..': %q", s)
}
if name.IsValid() == name.IsZero() {
t.Errorf("expected valid path to be non-zero value; got %#v", name)
if n.String() != s {
t.Errorf("String() = %q; want %q", n.String(), s)
}
}
})
}
func FuzzParseName(f *testing.F) {
f.Add("example.com/mistral:7b+Q4_0")
f.Add("example.com/mistral:7b+q4_0")
f.Add("example.com/mistral:7b+x")
f.Add("x/y/z:8n+I")
f.Add(":x")
f.Add("@sha256-123456")
f.Add("example.com/mistral:latest+Q4_0@sha256-123456")
f.Add(":@!@")
f.Add("...")
f.Fuzz(func(t *testing.T, s string) {
r0 := ParseNameFill(s, FillNothing)
if strings.Contains(s, "..") && !r0.IsZero() {
t.Fatalf("non-zero value for path with '..': %q", s)
}
if !r0.IsValid() && !r0.IsResolved() {
if !r0.EqualFold(Name{}) {
t.Errorf("expected invalid path to be zero value; got %#v", r0)
}
t.Skipf("invalid path: %q", s)
}
for _, p := range r0.parts {
if len(p) > MaxNamePartLen {
t.Errorf("part too long: %q", p)
}
}
if !strings.EqualFold(r0.DisplayLong(), s) {
t.Errorf("String() did not round-trip with case insensitivity: %q\ngot = %q\nwant = %q", s, r0.DisplayLong(), s)
}
r1 := ParseNameFill(r0.DisplayLong(), FillNothing)
if !r0.EqualFold(r1) {
t.Errorf("round-trip mismatch: %+v != %+v", r0, r1)
}
})
}
func TestNameStringAllocs(t *testing.T) {
name := ParseNameFill("example.com/ns/mistral:latest+Q4_0", FillNothing)
allocs := testing.AllocsPerRun(1000, func() {
keep(name.DisplayLong())
})
if allocs > 1 {
t.Errorf("String allocs = %v; want 0", allocs)
}
}
func TestNamePath(t *testing.T) {
cases := []struct {
in string
want string
}{
{"example.com/library/mistral:latest+Q4_0", "example.com/library/mistral:latest"},
// incomplete
{"example.com/library/mistral:latest", "example.com/library/mistral:latest"},
{"", ""},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
p := ParseNameFill(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
if g := p.DisplayURLPath(); g != tt.want {
t.Errorf("got = %q; want %q", g, tt.want)
}
})
}
}
func TestNameFilepath(t *testing.T) {
cases := []struct {
in string
want string
wantNoBuild string
}{
{
in: "example.com/library/mistral:latest+Q4_0",
want: "example.com/library/mistral/latest/Q4_0",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "Example.Com/Library/Mistral:Latest+Q4_0",
want: "example.com/library/mistral/latest/Q4_0",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "Example.Com/Library/Mistral:Latest+Q4_0",
want: "example.com/library/mistral/latest/Q4_0",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "example.com/library/mistral:latest",
want: "example.com/library/mistral/latest",
wantNoBuild: "example.com/library/mistral/latest",
},
{
in: "",
want: "",
wantNoBuild: "",
},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
p := ParseNameFill(tt.in, FillNothing)
t.Logf("ParseName(%q) = %#v", tt.in, p)
g := p.Filepath()
g = filepath.ToSlash(g)
if g != tt.want {
t.Errorf("got = %q; want %q", g, tt.want)
}
g = p.FilepathNoBuild()
g = filepath.ToSlash(g)
if g != tt.wantNoBuild {
t.Errorf("got = %q; want %q", g, tt.wantNoBuild)
}
})
}
}
func TestParseNameFilepath(t *testing.T) {
cases := []struct {
in string
fill string // default is FillNothing
want string
}{
{
in: "example.com/library/mistral/latest/Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "example.com/library/mistral/latest",
fill: "?/?/?:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "example.com/library/mistral",
fill: "?/?/?:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "example.com/library",
want: "",
},
{
in: "example.com/",
want: "",
},
{
in: "example.com/^/mistral/latest/Q4_0",
want: "",
},
{
in: "example.com/library/mistral/../Q4_0",
want: "",
},
{
in: "example.com/library/mistral/latest/Q4_0/extra",
want: "",
},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
fill := cmp.Or(tt.fill, FillNothing)
want := ParseNameFill(tt.want, fill)
if g := ParseNameFromFilepath(in, fill); !g.EqualFold(want) {
t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
}
})
}
}
func TestParseNameFromPath(t *testing.T) {
cases := []struct {
in string
want string
fill string // default is FillNothing
}{
{
in: "example.com/library/mistral:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "/example.com/library/mistral:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "/example.com/library/mistral",
want: "example.com/library/mistral",
},
{
in: "/example.com/library/mistral",
fill: "?/?/?:latest+Q4_0",
want: "example.com/library/mistral:latest+Q4_0",
},
{
in: "/example.com/library",
want: "",
},
{
in: "/example.com/",
want: "",
},
{
in: "/example.com/^/mistral/latest",
want: "",
},
}
for _, tt := range cases {
t.Run(tt.in, func(t *testing.T) {
fill := cmp.Or(tt.fill, FillNothing)
if g := ParseNameFromURLPath(tt.in, fill); g.DisplayLong() != tt.want {
t.Errorf("got = %q; want %q", g.DisplayLong(), tt.want)
}
})
}
}
func ExampleName_MapHash() {
m := map[uint64]bool{}
// key 1
m[ParseNameFill("mistral:latest+q4", FillNothing).MapHash()] = true
m[ParseNameFill("miSTRal:latest+Q4", FillNothing).MapHash()] = true
m[ParseNameFill("mistral:LATest+Q4", FillNothing).MapHash()] = true
// key 2
m[ParseNameFill("mistral:LATest", FillNothing).MapHash()] = true
fmt.Println(len(m))
// Output:
// 2
}
func ExampleName_CompareFold_sort() {
names := []Name{
ParseNameFill("mistral:latest", FillNothing),
ParseNameFill("mistRal:7b+q4", FillNothing),
ParseNameFill("MIstral:7b", FillNothing),
}
slices.SortFunc(names, Name.CompareFold)
for _, n := range names {
fmt.Println(n.DisplayLong())
}
// Output:
// MIstral:7b
// mistRal:7b+q4
// mistral:latest
}
func ExampleName_completeAndResolved() {
for _, s := range []string{
"x/y/z:latest+q4_0@sha123-abc",
"x/y/z:latest+q4_0",
"@sha123-abc",
} {
name := ParseNameFill(s, FillNothing)
fmt.Printf("complete:%v resolved:%v digest:%s\n", name.IsComplete(), name.IsResolved(), name.Digest())
}
// Output:
// complete:true resolved:true digest:sha123-abc
// complete:true resolved:false digest:
// complete:false resolved:true digest:sha123-abc
}
func ExampleName_DisplayShortest() {
name := ParseNameFill("example.com/jmorganca/mistral:latest+Q4_0", FillNothing)
fmt.Println(name.DisplayShortest("example.com/jmorganca/_:latest"))
fmt.Println(name.DisplayShortest("example.com/_/_:latest"))
fmt.Println(name.DisplayShortest("example.com/_/_:_"))
fmt.Println(name.DisplayShortest("_/_/_:_"))
// Default
name = ParseNameFill("registry.ollama.ai/library/mistral:latest+Q4_0", FillNothing)
fmt.Println(name.DisplayShortest(""))
// Output:
// mistral
// jmorganca/mistral
// jmorganca/mistral:latest
// example.com/jmorganca/mistral:latest
// mistral
}
func keep[T any](v T) T { return v }

View File

@@ -1,2 +1,2 @@
go test fuzz v1
string("/0")
string("00@")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("0//0")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("0 /0")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("+0/00000")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string(":")

View File

@@ -1,2 +0,0 @@
go test fuzz v1
string("0+.\xf2\x80\xf6\x9d00000\xe5\x99\xe6\xd900\xd90\xa60\x91\xdc0\xff\xbf\x99\xe800\xb9\xdc\xd6\xc300\x970\xfb\xfd0\xe0\x8a\xe1\xad\xd40\x9700\xa80\x980\xdd0000\xb00\x91000\xfe0\x89\x9b\x90\x93\x9f0\xe60\xf7\x84\xb0\x87\xa5\xff0\xa000\x9a\x85\xf6\x85\xfe\xa9\xf9\xe9\xde00\xf4\xe0\x8f\x81\xad\xde00\xd700\xaa\xe000000\xb1\xee0\x91")