cmd: handle sigint globally

This change also updates both client.do and client.stream to return ctx.Err(). Previously this error is skipped so canceled contexts are silently ignored
cmd: fix hide cursor
2026-02-28 04:56:37 -05:00 · 2025-02-19 10:46:25 -08:00 · 2025-02-19 09:43:44 -08:00
15 changed files with 117 additions and 723 deletions
--- a/README.md
+++ b/README.md
@@ -382,8 +382,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
 - [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
 - [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)

 ### Cloud

--- a/api/client.go
+++ b/api/client.go
@@ -126,7 +126,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 			return err
 		}
 	}
-	return nil
+
+	return ctx.Err()
 }

 const maxBufferSize = 512 * format.KiloByte
@@ -189,7 +190,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 		}
 	}

-	return nil
+	return ctx.Err()
 }

 // GenerateResponseFunc is a function that [Client.Generate] invokes every time
--- a/benchmark/ggml_backend_benchmark_test.go
+++ b/benchmark/ggml_backend_benchmark_test.go
@@ -1,86 +0,0 @@
-package backend
-
-import (
-	"flag"
-	"fmt"
-	"io"
-	"log"
-	"os"
-	"testing"
-
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/model"
-	"github.com/ollama/ollama/server"
-
-	_ "github.com/ollama/ollama/model/models/llama"
-)
-
-var modelName = flag.String("m", "", "Name of the model to benchmark")
-
-func suppressOutput() (cleanup func()) {
-	oldStdout, oldStderr := os.Stdout, os.Stderr
-	os.Stdout, os.Stderr = nil, nil
-	log.SetOutput(io.Discard)
-
-	return func() {
-		os.Stdout, os.Stderr = oldStdout, oldStderr
-		log.SetOutput(os.Stderr)
-	}
-}
-
-func setupModel(b *testing.B) model.Model {
-	if *modelName == "" {
-		b.Fatal("Error: -m flag is required for benchmark tests")
-	}
-
-	sm, err := server.GetModel(*modelName)
-	if err != nil {
-		b.Fatal(err)
-	}
-
-	m, err := model.New(sm.ModelPath)
-	if err != nil {
-		b.Fatal(err)
-	}
-
-	m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
-	return m
-}
-
-func BenchmarkGGMLOperations(b *testing.B) {
-	// loading the GGML back-end logs to standard out and makes the bench output messy
-	cleanup := suppressOutput()
-	defer cleanup()
-
-	b.Setenv("OLLAMA_BENCHMARK", "1")
-	b.Setenv("OLLAMA_BACKEND", "ggml")
-
-	m := setupModel(b)
-
-	// Sample input data
-	inputIDs := []int32{1, 2, 3, 4, 5}
-	options := model.Options{
-		Inputs:    inputIDs,
-		Positions: []int32{1, 2, 3, 4, 5},
-		Sequences: []int{1, 1, 1, 1, 1},
-		Outputs:   []int32{int32(len(inputIDs) - 1)},
-	}
-
-	b.ResetTimer()
-
-	for range b.N {
-		ctx := m.Backend().NewContext()
-		defer ctx.Close()
-
-		modelOutput, err := model.Forward(ctx, m, options)
-		if err != nil {
-			b.Fatal(fmt.Errorf("forward pass failed: %v", err))
-		}
-
-		ctx.Compute(modelOutput)
-
-		for _, op := range ctx.Timing() {
-			b.ReportMetric(op.Duration, fmt.Sprintf("%s_ms", op.Type))
-		}
-	}
-}
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -15,13 +15,11 @@ import (
 	"net"
 	"net/http"
 	"os"
-	"os/signal"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"sync/atomic"
-	"syscall"
 	"time"

 	"github.com/containerd/console"
@@ -330,6 +328,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 			if err := PullHandler(cmd, []string{name}); err != nil {
 				return nil, err
 			}
+
 			return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
 		}
 		return info, err
@@ -858,17 +857,6 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 	spinner := progress.NewSpinner("")
 	p.Add("", spinner)

-	cancelCtx, cancel := context.WithCancel(cmd.Context())
-	defer cancel()
-
-	sigChan := make(chan os.Signal, 1)
-	signal.Notify(sigChan, syscall.SIGINT)
-
-	go func() {
-		<-sigChan
-		cancel()
-	}()
-
 	var state *displayResponseState = &displayResponseState{}
 	var latest api.ChatResponse
 	var fullResponse strings.Builder
@@ -903,10 +891,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 		req.KeepAlive = opts.KeepAlive
 	}

-	if err := client.Chat(cancelCtx, req, fn); err != nil {
-		if errors.Is(err, context.Canceled) {
-			return nil, nil
-		}
+	if err := client.Chat(cmd.Context(), req, fn); err != nil {
 		return nil, err
 	}

@@ -946,17 +931,6 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		generateContext = []int{}
 	}

-	ctx, cancel := context.WithCancel(cmd.Context())
-	defer cancel()
-
-	sigChan := make(chan os.Signal, 1)
-	signal.Notify(sigChan, syscall.SIGINT)
-
-	go func() {
-		<-sigChan
-		cancel()
-	}()
-
 	var state *displayResponseState = &displayResponseState{}

 	fn := func(response api.GenerateResponse) error {
@@ -992,10 +966,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		KeepAlive: opts.KeepAlive,
 	}

-	if err := client.Generate(ctx, &request, fn); err != nil {
-		if errors.Is(err, context.Canceled) {
-			return nil
-		}
+	if err := client.Generate(cmd.Context(), &request, fn); err != nil {
 		return err
 	}

@@ -1017,8 +988,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		latest.Summary()
 	}

-	ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
-	cmd.SetContext(ctx)
+	cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))

 	return nil
 }
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -10,7 +10,6 @@ import (
 	"os"
 	"strings"
 	"testing"
-	"time"

 	"github.com/google/go-cmp/cmp"
 	"github.com/spf13/cobra"
@@ -491,96 +490,6 @@ func TestPushHandler(t *testing.T) {
 	}
 }

-func TestListHandler(t *testing.T) {
-	tests := []struct {
-		name           string
-		args           []string
-		serverResponse []api.ListModelResponse
-		expectedError  string
-		expectedOutput string
-	}{
-		{
-			name: "list all models",
-			args: []string{},
-			serverResponse: []api.ListModelResponse{
-				{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
-				{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-48 * time.Hour)},
-			},
-			expectedOutput: "NAME      ID              SIZE      MODIFIED     \n" +
-				"model1    sha256:abc12    1.0 KB    24 hours ago    \n" +
-				"model2    sha256:def45    2.0 KB    2 days ago      \n",
-		},
-		{
-			name: "filter models by prefix",
-			args: []string{"model1"},
-			serverResponse: []api.ListModelResponse{
-				{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
-				{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-24 * time.Hour)},
-			},
-			expectedOutput: "NAME      ID              SIZE      MODIFIED     \n" +
-				"model1    sha256:abc12    1.0 KB    24 hours ago    \n",
-		},
-		{
-			name:          "server error",
-			args:          []string{},
-			expectedError: "server error",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if r.URL.Path != "/api/tags" || r.Method != http.MethodGet {
-					t.Errorf("unexpected request to %s %s", r.Method, r.URL.Path)
-					http.Error(w, "not found", http.StatusNotFound)
-					return
-				}
-
-				if tt.expectedError != "" {
-					http.Error(w, tt.expectedError, http.StatusInternalServerError)
-					return
-				}
-
-				response := api.ListResponse{Models: tt.serverResponse}
-				if err := json.NewEncoder(w).Encode(response); err != nil {
-					t.Fatal(err)
-				}
-			}))
-			defer mockServer.Close()
-
-			t.Setenv("OLLAMA_HOST", mockServer.URL)
-
-			cmd := &cobra.Command{}
-			cmd.SetContext(context.TODO())
-
-			// Capture stdout
-			oldStdout := os.Stdout
-			r, w, _ := os.Pipe()
-			os.Stdout = w
-
-			err := ListHandler(cmd, tt.args)
-
-			// Restore stdout and get output
-			w.Close()
-			os.Stdout = oldStdout
-			output, _ := io.ReadAll(r)
-
-			if tt.expectedError == "" {
-				if err != nil {
-					t.Errorf("expected no error, got %v", err)
-				}
-				if got := string(output); got != tt.expectedOutput {
-					t.Errorf("expected output:\n%s\ngot:\n%s", tt.expectedOutput, got)
-				}
-			} else {
-				if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
-					t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
-				}
-			}
-		})
-	}
-}
-
 func TestCreateHandler(t *testing.T) {
 	tests := []struct {
 		name           string
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -167,8 +167,6 @@ var (
 	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
-	// Ollama is running in a benchmark context, additional timing data will be collected.
-	Benchmark = Bool("OLLAMA_BENCHMARK")
 )

 func String(s string) func() string {
--- a/kvcache/causal_test.go
+++ b/kvcache/causal_test.go
@@ -352,10 +352,6 @@ func (c *testContext) MaxTensors() int {
 	return 10
 }

-func (c *testContext) Timing() []ml.OpTiming {
-	return []ml.OpTiming{}
-}
-
 func (c *testContext) Close() {}

 type testTensor struct {
--- a/llama/patches/0018-remove-amx.patch
+++ b/llama/patches/0018-remove-amx.patch
@@ -1,24 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Michael Yang <mxyng@pm.me>
-Date: Tue, 18 Feb 2025 14:47:21 -0800
-Subject: [PATCH] remove amx
-
---
- ggml/src/CMakeLists.txt | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
-index 72b488dd..50828717 100644
--- a/ggml/src/CMakeLists.txt
-+++ b/ggml/src/CMakeLists.txt
-@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
-     ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 FMA AVX512)
-     ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
-     ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 FMA AVX_VNNI)
-    if (NOT MSVC)
-        # MSVC doesn't support AMX
-        ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
-    endif()
- else ()
-     ggml_add_cpu_backend_variant_impl("")
- endif()
--- a/llama/patches/0018-use-std-filesystem-path-instead-of-wstring.patch
+++ b/llama/patches/0018-use-std-filesystem-path-instead-of-wstring.patch
@@ -1,285 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: jmorganca <jmorganca@gmail.com>
-Date: Sun, 16 Feb 2025 20:00:22 -0500
-Subject: [PATCH] use std::filesystem::path instead of wstring
-
---
- ggml/src/ggml-backend-reg.cpp | 116 ++++++++++++----------------------
- 1 file changed, 40 insertions(+), 76 deletions(-)
-
-diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
-index 84b21dd8..de78feae 100644
--- a/ggml/src/ggml-backend-reg.cpp
-+++ b/ggml/src/ggml-backend-reg.cpp
-@@ -72,16 +72,6 @@
- #    pragma clang diagnostic ignored "-Wdeprecated-declarations"
- #endif
- 
-static std::wstring utf8_to_utf16(const std::string & str) {
-    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
-    return converter.from_bytes(str);
-}
-
-static std::string utf16_to_utf8(const std::wstring & str) {
-    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
-    return converter.to_bytes(str);
-}
-
- #if defined(__clang__)
- #    pragma clang diagnostic pop
- #endif
-@@ -96,12 +86,12 @@ struct dl_handle_deleter {
-     }
- };
- 
-static dl_handle * dl_load_library(const std::wstring & path) {
-+static dl_handle * dl_load_library(const std::filesystem::path & path) {
-     // suppress error dialogs for missing DLLs
-     DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
-     SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
- 
-    HMODULE handle = LoadLibraryW(path.c_str());
-+    HMODULE handle = LoadLibraryW(path.wstring().c_str());
- 
-     SetErrorMode(old_mode);
- 
-@@ -129,8 +119,8 @@ struct dl_handle_deleter {
-     }
- };
- 
-static void * dl_load_library(const std::wstring & path) {
-    dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
-+static void * dl_load_library(const std::filesystem::path & path) {
-+    dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
- 
-     return handle;
- }
-@@ -222,11 +212,11 @@ struct ggml_backend_registry {
-         );
-     }
- 
-    ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
-+    ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
-         dl_handle_ptr handle { dl_load_library(path) };
-         if (!handle) {
-             if (!silent) {
-                GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
-+                GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
-             }
-             return nullptr;
-         }
-@@ -234,7 +224,7 @@ struct ggml_backend_registry {
-         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
-         if (score_fn && score_fn() == 0) {
-             if (!silent) {
-                GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
-+                GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
-             }
-             return nullptr;
-         }
-@@ -242,7 +232,7 @@ struct ggml_backend_registry {
-         auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
-         if (!backend_init_fn) {
-             if (!silent) {
-                GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
-+                GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
-             }
-             return nullptr;
-         }
-@@ -251,16 +241,16 @@ struct ggml_backend_registry {
-         if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
-             if (!silent) {
-                 if (!reg) {
-                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
-+                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
-                 } else {
-                     GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
-                        __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
-+                        __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
-                 }
-             }
-             return nullptr;
-         }
- 
-        GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
-+        GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
- 
-         register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
- 
-@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
- 
- // Dynamic loading
- ggml_backend_reg_t ggml_backend_load(const char * path) {
-    return get_reg().load_backend(utf8_to_utf16(path), false);
-+    return get_reg().load_backend(path, false);
- }
- 
- void ggml_backend_unload(ggml_backend_reg_t reg) {
-     get_reg().unload_backend(reg, true);
- }
- 
-static std::wstring get_executable_path() {
-+static std::filesystem::path get_executable_path() {
- #if defined(__APPLE__)
-     // get executable path
-     std::vector<char> path;
-@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
-         }
-         path.resize(size);
-     }
-    std::string base_path(path.data(), size);
-    // remove executable name
-    auto last_slash = base_path.find_last_of('/');
-    if (last_slash != std::string::npos) {
-        base_path = base_path.substr(0, last_slash);
-    }
-    return utf8_to_utf16(base_path + "/");
-+
-+    return std::filesystem::path(path.data()).parent_path();
- #elif defined(__linux__) || defined(__FreeBSD__)
-    std::string base_path = ".";
-     std::vector<char> path(1024);
-     while (true) {
-         // get executable path
-@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
-             break;
-         }
-         if (len < (ssize_t) path.size()) {
-            base_path = std::string(path.data(), len);
-            // remove executable name
-            auto last_slash = base_path.find_last_of('/');
-            if (last_slash != std::string::npos) {
-                base_path = base_path.substr(0, last_slash);
-            }
-            break;
-+            return std::filesystem::path(path.data()).parent_path();
-         }
-         path.resize(path.size() * 2);
-     }
-
-    return utf8_to_utf16(base_path + "/");
- #elif defined(_WIN32)
-     std::vector<wchar_t> path(MAX_PATH);
-     DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
-     if (len == 0) {
-         return {};
-     }
-    std::wstring base_path(path.data(), len);
-    // remove executable name
-    auto last_slash = base_path.find_last_of('\\');
-    if (last_slash != std::string::npos) {
-        base_path = base_path.substr(0, last_slash);
-    }
-    return base_path + L"\\";
-#else
-    return {};
-#endif
-}
- 
-static std::wstring backend_filename_prefix() {
-#ifdef _WIN32
-    return L"ggml-";
-+    return std::filesystem::path(path.data()).parent_path();
- #else
-    return L"libggml-";
-+    return {};
- #endif
- }
- 
-static std::wstring backend_filename_suffix() {
-+static std::string backend_filename_prefix() {
- #ifdef _WIN32
-    return L".dll";
-+    return "ggml-";
- #else
-    return L".so";
-+    return "libggml-";
- #endif
- }
- 
-static std::wstring path_separator() {
-+static std::string backend_filename_suffix() {
- #ifdef _WIN32
-    return L"\\";
-+    return ".dll";
- #else
-    return L"/";
-+    return ".so";
- #endif
- }
- 
- static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
-     // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
-      // TODO: search system paths
-    std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
-    std::vector<std::wstring> search_paths;
-+    namespace fs = std::filesystem;
-+    std::string file_prefix = backend_filename_prefix() + name + "-";
-+    std::vector<fs::path> search_paths;
-+
-     if (user_search_path == nullptr) {
-        search_paths.push_back(L"." + path_separator());
-+        search_paths.push_back(fs::current_path());
-         search_paths.push_back(get_executable_path());
-     } else {
-        search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
-+        search_paths.push_back(fs::u8path(user_search_path));
-     }
- 
-     int best_score = 0;
-    std::wstring best_path;
-+    fs::path best_path;
- 
-    namespace fs = std::filesystem;
-     for (const auto & search_path : search_paths) {
-         if (!fs::exists(search_path)) {
-             continue;
-@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
-         for (const auto & entry : dir_it) {
-             try {
-                 if (entry.is_regular_file()) {
-                    std::wstring filename = entry.path().filename().wstring();
-                    std::wstring ext = entry.path().extension().wstring();
-+                    std::string filename = entry.path().filename().string();
-+                    std::string ext = entry.path().extension().string();
-                     if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
-                        dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
-+                        dl_handle_ptr handle { dl_load_library(entry.path()) };
-                         if (!handle) {
-                            GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
-+                            GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
-                             continue;
-                         }
- 
-                         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
-                         if (!score_fn) {
-                            GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
-+                            GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
-                             continue;
-                         }
- 
-                         int s = score_fn();
-                        GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
-+                        GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
-                         if (s > best_score) {
-                             best_score = s;
-                            best_path = entry.path().wstring();
-+                            best_path = entry.path();
-                         }
-                     }
-                 }
-             } catch (const std::exception & e) {
-                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
-+                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
-             }
-         }
-     }
-@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
-     if (best_score == 0) {
-         // try to load the base backend
-         for (const auto & search_path : search_paths) {
-            std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
-+            fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
-             if (fs::exists(path)) {
-                 return get_reg().load_backend(path, silent);
-             }
--- a/main.go
+++ b/main.go
@@ -2,6 +2,8 @@ package main

 import (
 	"context"
+	"os"
+	"os/signal"

 	"github.com/spf13/cobra"

@@ -9,5 +11,15 @@ import (
 )

 func main() {
-	cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, os.Interrupt)
+	go func() {
+		<-sigChan
+		cancel()
+	}()
+
+	cobra.CheckErr(cmd.NewCLI().ExecuteContext(ctx))
 }
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -2,7 +2,6 @@ package ml

 import (
 	"bytes"
-	"cmp"
 	"encoding/binary"
 	"fmt"
 	"os"
@@ -38,7 +37,7 @@ func RegisterBackend(name string, f func(*os.File) (Backend, error)) {
 }

 func NewBackend(f *os.File) (Backend, error) {
-	if backend, ok := backends[cmp.Or(os.Getenv("OLLAMA_BACKEND"), "ggml")]; ok {
+	if backend, ok := backends["ggml"]; ok {
 		return backend(f)
 	}

@@ -54,30 +53,6 @@ type Context interface {
 	Compute(...Tensor)
 	MaxTensors() int
 	Close()
-
-	Timing() []OpTiming
-}
-
-// OpType is the type of operation performed during a forward pass.
-type OpType string
-
-const (
-	View       OpType = "View"
-	Copy       OpType = "Copy"
-	Reshape    OpType = "Reshape"
-	Permute    OpType = "Permute"
-	Contiguous OpType = "Contiguous"
-	Input      OpType = "Input"
-	ComputeOp  OpType = "Compute"
-	Transpose  OpType = "Transpose"
-)
-
-// OpTiming stores the timing information for a single operation.
-type OpTiming struct {
-	Type      OpType
-	Operation string
-	Duration  float64
-	Order     int
 }

 type Tensor interface {
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -4,8 +4,6 @@ package ggml
 #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
 #include <stdlib.h>
 #include <stdint.h>
-#include <time.h>
-#include <string.h>
 #include "ggml.h"
 #include "ggml-cpu.h"
 #include "ggml-backend.h"
@@ -23,54 +21,6 @@ COMPILER inline get_compiler() {
 #endif
 }

-// Define a fixed-size struct to store timing data
-#define MAX_TENSOR_NAME 256
-#define MAX_TIMINGS 1000
-
-typedef struct {
-    char tensor_name[MAX_TENSOR_NAME];
-    double duration_ms;
-} timing_entry;
-
-typedef struct {
-    timing_entry entries[MAX_TIMINGS];
-    int count;
-} timing_data;
-
-// Global timing data structure
-timing_data g_timings = {0};
-
-double get_time_ms() {
-    struct timespec ts;
-    clock_gettime(CLOCK_MONOTONIC, &ts);
-    return ts.tv_sec * 1000.0 + ts.tv_nsec / 1000000.0;
-}
-
-bool debug_callback(struct ggml_tensor * t, bool ask, void * user_data) {
-    static double start_time;
-    static char current_tensor[MAX_TENSOR_NAME];
-
-    if (ask) {
-        start_time = get_time_ms();
-        strncpy(current_tensor, t->name, MAX_TENSOR_NAME - 1);
-        current_tensor[MAX_TENSOR_NAME - 1] = '\0';
-    } else {
-        double end_time = get_time_ms();
-        double duration = end_time - start_time;
-
-        if (g_timings.count < MAX_TIMINGS) {
-            strncpy(g_timings.entries[g_timings.count].tensor_name, current_tensor, MAX_TENSOR_NAME - 1);
-            g_timings.entries[g_timings.count].duration_ms = duration;
-            g_timings.count++;
-        }
-    }
-    return true;
-}
-
-void clear_timings() {
-    g_timings.count = 0;
-}
-
 */
 import "C"

@@ -79,11 +29,9 @@ import (
 	"io"
 	"log/slog"
 	"os"
-	"strings"
 	"sync"
 	"unsafe"

-	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	fs "github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/ml"
@@ -308,62 +256,7 @@ func (c *Context) Forward(t ml.Tensor) {
 	C.ggml_build_forward_expand(c.graph, t.(*Tensor).t)
 }

-// Timing retrieves the collected timing data
-func (c *Context) Timing() []ml.OpTiming {
-	sequence := make([]ml.OpTiming, C.g_timings.count)
-
-	for i := range int(C.g_timings.count) {
-		entry := C.g_timings.entries[i]
-		tensorName := C.GoString(&entry.tensor_name[0])
-
-		// Determine operation type and description based on tensor name
-		var opType ml.OpType
-		var opDesc string
-
-		switch {
-		case strings.Contains(tensorName, "(view)"):
-			opType, opDesc = ml.View, "Memory view"
-		case strings.Contains(tensorName, "(copy)") || strings.Contains(tensorName, "(copy of"):
-			opType, opDesc = ml.Copy, "Memory copy"
-		case strings.Contains(tensorName, "(reshaped)"):
-			opType, opDesc = ml.Reshape, "Reshape"
-		case strings.Contains(tensorName, "(permuted)"):
-			opType, opDesc = ml.Permute, "Permute dimensions"
-		case strings.Contains(tensorName, "(cont)"):
-			opType, opDesc = ml.Contiguous, "Make contiguous"
-		case strings.Contains(tensorName, "(transposed)"):
-			opType, opDesc = ml.Transpose, "Transpose"
-		case strings.HasPrefix(tensorName, "leaf_"):
-			opType, opDesc = ml.Input, fmt.Sprintf("Input tensor %s", tensorName)
-		case strings.HasPrefix(tensorName, "node_"):
-			opType, opDesc = ml.ComputeOp, fmt.Sprintf("Computation %s", tensorName)
-		default:
-			opType, opDesc = "Unknown", tensorName
-		}
-
-		sequence[i] = ml.OpTiming{
-			Type:      opType,
-			Operation: opDesc,
-			Duration:  float64(entry.duration_ms),
-			Order:     i,
-		}
-	}
-
-	return sequence
-}
-
 func (c *Context) Compute(tensors ...ml.Tensor) {
-	if envconfig.Benchmark() {
-		// Clear previous timings before new computation
-		C.clear_timings()
-
-		C.ggml_backend_sched_set_eval_callback(
-			c.sched,
-			C.ggml_backend_eval_callback(C.debug_callback),
-			nil,
-		)
-	}
-
 	C.ggml_backend_sched_graph_compute_async(c.sched, c.graph)

 	needSync := true
--- a/ml/backend/ggml/ggml/src/CMakeLists.txt
+++ b/ml/backend/ggml/ggml/src/CMakeLists.txt
@@ -293,6 +293,10 @@ if (GGML_CPU_ALL_VARIANTS)
    ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 FMA AVX512)
    ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
    ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 FMA AVX_VNNI)
+    if (NOT MSVC)
+        # MSVC doesn't support AMX
+        ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
+    endif()
 else ()
    ggml_add_cpu_backend_variant_impl("")
 endif()
--- a/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
@@ -72,6 +72,16 @@
 #    pragma clang diagnostic ignored "-Wdeprecated-declarations"
 #endif

+static std::wstring utf8_to_utf16(const std::string & str) {
+    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+    return converter.from_bytes(str);
+}
+
+static std::string utf16_to_utf8(const std::wstring & str) {
+    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+    return converter.to_bytes(str);
+}
+
 #if defined(__clang__)
 #    pragma clang diagnostic pop
 #endif
@@ -86,12 +96,12 @@ struct dl_handle_deleter {
    }
 };

-static dl_handle * dl_load_library(const std::filesystem::path & path) {
+static dl_handle * dl_load_library(const std::wstring & path) {
    // suppress error dialogs for missing DLLs
    DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
    SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);

-    HMODULE handle = LoadLibraryW(path.wstring().c_str());
+    HMODULE handle = LoadLibraryW(path.c_str());

    SetErrorMode(old_mode);

@@ -119,8 +129,8 @@ struct dl_handle_deleter {
    }
 };

-static void * dl_load_library(const std::filesystem::path & path) {
-    dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
+static void * dl_load_library(const std::wstring & path) {
+    dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);

    return handle;
 }
@@ -212,11 +222,11 @@ struct ggml_backend_registry {
        );
    }

-    ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
+    ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
        dl_handle_ptr handle { dl_load_library(path) };
        if (!handle) {
            if (!silent) {
-                GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
+                GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
            }
            return nullptr;
        }
@@ -224,7 +234,7 @@ struct ggml_backend_registry {
        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
        if (score_fn && score_fn() == 0) {
            if (!silent) {
-                GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
+                GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
            }
            return nullptr;
        }
@@ -232,7 +242,7 @@ struct ggml_backend_registry {
        auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
        if (!backend_init_fn) {
            if (!silent) {
-                GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
+                GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
            }
            return nullptr;
        }
@@ -241,16 +251,16 @@ struct ggml_backend_registry {
        if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
            if (!silent) {
                if (!reg) {
-                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
+                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
                } else {
                    GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
-                        __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
+                        __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
                }
            }
            return nullptr;
        }

-        GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
+        GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());

        register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));

@@ -386,14 +396,14 @@ ggml_backend_t ggml_backend_init_best(void) {

 // Dynamic loading
 ggml_backend_reg_t ggml_backend_load(const char * path) {
-    return get_reg().load_backend(path, false);
+    return get_reg().load_backend(utf8_to_utf16(path), false);
 }

 void ggml_backend_unload(ggml_backend_reg_t reg) {
    get_reg().unload_backend(reg, true);
 }

-static std::filesystem::path get_executable_path() {
+static std::wstring get_executable_path() {
 #if defined(__APPLE__)
    // get executable path
    std::vector<char> path;
@@ -405,9 +415,15 @@ static std::filesystem::path get_executable_path() {
        }
        path.resize(size);
    }
-
-    return std::filesystem::path(path.data()).parent_path();
+    std::string base_path(path.data(), size);
+    // remove executable name
+    auto last_slash = base_path.find_last_of('/');
+    if (last_slash != std::string::npos) {
+        base_path = base_path.substr(0, last_slash);
+    }
+    return utf8_to_utf16(base_path + "/");
 #elif defined(__linux__) || defined(__FreeBSD__)
+    std::string base_path = ".";
    std::vector<char> path(1024);
    while (true) {
        // get executable path
@@ -420,56 +436,76 @@ static std::filesystem::path get_executable_path() {
            break;
        }
        if (len < (ssize_t) path.size()) {
-            return std::filesystem::path(path.data()).parent_path();
+            base_path = std::string(path.data(), len);
+            // remove executable name
+            auto last_slash = base_path.find_last_of('/');
+            if (last_slash != std::string::npos) {
+                base_path = base_path.substr(0, last_slash);
+            }
+            break;
        }
        path.resize(path.size() * 2);
    }
+
+    return utf8_to_utf16(base_path + "/");
 #elif defined(_WIN32)
    std::vector<wchar_t> path(MAX_PATH);
    DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
    if (len == 0) {
        return {};
    }
-
-    return std::filesystem::path(path.data()).parent_path();
+    std::wstring base_path(path.data(), len);
+    // remove executable name
+    auto last_slash = base_path.find_last_of('\\');
+    if (last_slash != std::string::npos) {
+        base_path = base_path.substr(0, last_slash);
+    }
+    return base_path + L"\\";
 #else
    return {};
 #endif
 }

-static std::string backend_filename_prefix() {
+static std::wstring backend_filename_prefix() {
 #ifdef _WIN32
-    return "ggml-";
+    return L"ggml-";
 #else
-    return "libggml-";
+    return L"libggml-";
 #endif
 }

-static std::string backend_filename_suffix() {
+static std::wstring backend_filename_suffix() {
 #ifdef _WIN32
-    return ".dll";
+    return L".dll";
 #else
-    return ".so";
+    return L".so";
+#endif
+}
+
+static std::wstring path_separator() {
+#ifdef _WIN32
+    return L"\\";
+#else
+    return L"/";
 #endif
 }

 static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
    // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
     // TODO: search system paths
-    namespace fs = std::filesystem;
-    std::string file_prefix = backend_filename_prefix() + name + "-";
-    std::vector<fs::path> search_paths;
-
+    std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
+    std::vector<std::wstring> search_paths;
    if (user_search_path == nullptr) {
-        search_paths.push_back(fs::current_path());
+        search_paths.push_back(L"." + path_separator());
        search_paths.push_back(get_executable_path());
    } else {
-        search_paths.push_back(fs::u8path(user_search_path));
+        search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
    }

    int best_score = 0;
-    fs::path best_path;
+    std::wstring best_path;

+    namespace fs = std::filesystem;
    for (const auto & search_path : search_paths) {
        if (!fs::exists(search_path)) {
            continue;
@@ -478,31 +514,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
        for (const auto & entry : dir_it) {
            try {
                if (entry.is_regular_file()) {
-                    std::string filename = entry.path().filename().string();
-                    std::string ext = entry.path().extension().string();
+                    std::wstring filename = entry.path().filename().wstring();
+                    std::wstring ext = entry.path().extension().wstring();
                    if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
-                        dl_handle_ptr handle { dl_load_library(entry.path()) };
+                        dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
                        if (!handle) {
-                            GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
+                            GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
                            continue;
                        }

                        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
                        if (!score_fn) {
-                            GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
+                            GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
                            continue;
                        }

                        int s = score_fn();
-                        GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
+                        GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
                        if (s > best_score) {
                            best_score = s;
-                            best_path = entry.path();
+                            best_path = entry.path().wstring();
                        }
                    }
                }
            } catch (const std::exception & e) {
-                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
+                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
            }
        }
    }
@@ -510,7 +546,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
    if (best_score == 0) {
        // try to load the base backend
        for (const auto & search_path : search_paths) {
-            fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
+            std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
            if (fs::exists(path)) {
                return get_reg().load_backend(path, silent);
            }
--- a/progress/progress.go
+++ b/progress/progress.go
@@ -49,29 +49,29 @@ func (p *Progress) stop() bool {
 func (p *Progress) Stop() bool {
 	stopped := p.stop()
 	if stopped {
-		fmt.Fprint(p.w, "\n")
-		p.w.Flush()
+		fmt.Fprintln(p.w)
 	}
+
+	// show cursor
+	fmt.Fprint(p.w, "\033[?25h")
+	p.w.Flush()
 	return stopped
 }

 func (p *Progress) StopAndClear() bool {
-	defer p.w.Flush()
-
-	fmt.Fprint(p.w, "\033[?25l")
-	defer fmt.Fprint(p.w, "\033[?25h")
-
 	stopped := p.stop()
 	if stopped {
 		// clear all progress lines
-		for i := range p.pos {
-			if i > 0 {
-				fmt.Fprint(p.w, "\033[A")
-			}
-			fmt.Fprint(p.w, "\033[2K\033[1G")
+		for range p.pos - 1 {
+			fmt.Fprint(p.w, "\033[A")
 		}
+
+		fmt.Fprint(p.w, "\033[2K", "\033[1G")
 	}

+	// show cursor
+	fmt.Fprint(p.w, "\033[?25h")
+	p.w.Flush()
 	return stopped
 }

@@ -86,19 +86,13 @@ func (p *Progress) render() {
 	p.mu.Lock()
 	defer p.mu.Unlock()

-	defer p.w.Flush()
-
-	// eliminate flickering on terminals that support synchronized output
 	fmt.Fprint(p.w, "\033[?2026h")
 	defer fmt.Fprint(p.w, "\033[?2026l")

-	fmt.Fprint(p.w, "\033[?25l")
-	defer fmt.Fprint(p.w, "\033[?25h")
-
-	// move the cursor back to the beginning
 	for range p.pos - 1 {
 		fmt.Fprint(p.w, "\033[A")
 	}
+
 	fmt.Fprint(p.w, "\033[1G")

 	// render progress lines
@@ -110,10 +104,13 @@ func (p *Progress) render() {
 	}

 	p.pos = len(p.states)
+	p.w.Flush()
 }

 func (p *Progress) start() {
 	p.ticker = time.NewTicker(100 * time.Millisecond)
+	// hide cursor
+	fmt.Fprint(p.w, "\033[?25l")
 	for range p.ticker.C {
 		p.render()
 	}