Compare commits

..

2 Commits

Author SHA1 Message Date
Michael Yang
fcfbb06f1b cmd: handle sigint globally
This change also updates both client.do and client.stream to return
ctx.Err(). Previously this error is skipped so canceled contexts are
silently ignored
2025-02-19 10:46:25 -08:00
Michael Yang
e8d35d0de0 cmd: fix hide cursor
hides the cursor for the entire progress rather than each render cycle
2025-02-19 09:43:44 -08:00
15 changed files with 117 additions and 723 deletions

View File

@@ -382,8 +382,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
### Cloud

View File

@@ -126,7 +126,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
return err
}
}
return nil
return ctx.Err()
}
const maxBufferSize = 512 * format.KiloByte
@@ -189,7 +190,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
}
}
return nil
return ctx.Err()
}
// GenerateResponseFunc is a function that [Client.Generate] invokes every time

View File

@@ -1,86 +0,0 @@
package backend
import (
"flag"
"fmt"
"io"
"log"
"os"
"testing"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/server"
_ "github.com/ollama/ollama/model/models/llama"
)
var modelName = flag.String("m", "", "Name of the model to benchmark")
func suppressOutput() (cleanup func()) {
oldStdout, oldStderr := os.Stdout, os.Stderr
os.Stdout, os.Stderr = nil, nil
log.SetOutput(io.Discard)
return func() {
os.Stdout, os.Stderr = oldStdout, oldStderr
log.SetOutput(os.Stderr)
}
}
func setupModel(b *testing.B) model.Model {
if *modelName == "" {
b.Fatal("Error: -m flag is required for benchmark tests")
}
sm, err := server.GetModel(*modelName)
if err != nil {
b.Fatal(err)
}
m, err := model.New(sm.ModelPath)
if err != nil {
b.Fatal(err)
}
m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
return m
}
func BenchmarkGGMLOperations(b *testing.B) {
// loading the GGML back-end logs to standard out and makes the bench output messy
cleanup := suppressOutput()
defer cleanup()
b.Setenv("OLLAMA_BENCHMARK", "1")
b.Setenv("OLLAMA_BACKEND", "ggml")
m := setupModel(b)
// Sample input data
inputIDs := []int32{1, 2, 3, 4, 5}
options := model.Options{
Inputs: inputIDs,
Positions: []int32{1, 2, 3, 4, 5},
Sequences: []int{1, 1, 1, 1, 1},
Outputs: []int32{int32(len(inputIDs) - 1)},
}
b.ResetTimer()
for range b.N {
ctx := m.Backend().NewContext()
defer ctx.Close()
modelOutput, err := model.Forward(ctx, m, options)
if err != nil {
b.Fatal(fmt.Errorf("forward pass failed: %v", err))
}
ctx.Compute(modelOutput)
for _, op := range ctx.Timing() {
b.ReportMetric(op.Duration, fmt.Sprintf("%s_ms", op.Type))
}
}
}

View File

@@ -15,13 +15,11 @@ import (
"net"
"net/http"
"os"
"os/signal"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync/atomic"
"syscall"
"time"
"github.com/containerd/console"
@@ -330,6 +328,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
if err := PullHandler(cmd, []string{name}); err != nil {
return nil, err
}
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
}
return info, err
@@ -858,17 +857,6 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
spinner := progress.NewSpinner("")
p.Add("", spinner)
cancelCtx, cancel := context.WithCancel(cmd.Context())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
go func() {
<-sigChan
cancel()
}()
var state *displayResponseState = &displayResponseState{}
var latest api.ChatResponse
var fullResponse strings.Builder
@@ -903,10 +891,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
req.KeepAlive = opts.KeepAlive
}
if err := client.Chat(cancelCtx, req, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil, nil
}
if err := client.Chat(cmd.Context(), req, fn); err != nil {
return nil, err
}
@@ -946,17 +931,6 @@ func generate(cmd *cobra.Command, opts runOptions) error {
generateContext = []int{}
}
ctx, cancel := context.WithCancel(cmd.Context())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
go func() {
<-sigChan
cancel()
}()
var state *displayResponseState = &displayResponseState{}
fn := func(response api.GenerateResponse) error {
@@ -992,10 +966,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
KeepAlive: opts.KeepAlive,
}
if err := client.Generate(ctx, &request, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil
}
if err := client.Generate(cmd.Context(), &request, fn); err != nil {
return err
}
@@ -1017,8 +988,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
latest.Summary()
}
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
cmd.SetContext(ctx)
cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))
return nil
}

View File

@@ -10,7 +10,6 @@ import (
"os"
"strings"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/spf13/cobra"
@@ -491,96 +490,6 @@ func TestPushHandler(t *testing.T) {
}
}
func TestListHandler(t *testing.T) {
tests := []struct {
name string
args []string
serverResponse []api.ListModelResponse
expectedError string
expectedOutput string
}{
{
name: "list all models",
args: []string{},
serverResponse: []api.ListModelResponse{
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-48 * time.Hour)},
},
expectedOutput: "NAME ID SIZE MODIFIED \n" +
"model1 sha256:abc12 1.0 KB 24 hours ago \n" +
"model2 sha256:def45 2.0 KB 2 days ago \n",
},
{
name: "filter models by prefix",
args: []string{"model1"},
serverResponse: []api.ListModelResponse{
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-24 * time.Hour)},
},
expectedOutput: "NAME ID SIZE MODIFIED \n" +
"model1 sha256:abc12 1.0 KB 24 hours ago \n",
},
{
name: "server error",
args: []string{},
expectedError: "server error",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/tags" || r.Method != http.MethodGet {
t.Errorf("unexpected request to %s %s", r.Method, r.URL.Path)
http.Error(w, "not found", http.StatusNotFound)
return
}
if tt.expectedError != "" {
http.Error(w, tt.expectedError, http.StatusInternalServerError)
return
}
response := api.ListResponse{Models: tt.serverResponse}
if err := json.NewEncoder(w).Encode(response); err != nil {
t.Fatal(err)
}
}))
defer mockServer.Close()
t.Setenv("OLLAMA_HOST", mockServer.URL)
cmd := &cobra.Command{}
cmd.SetContext(context.TODO())
// Capture stdout
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
err := ListHandler(cmd, tt.args)
// Restore stdout and get output
w.Close()
os.Stdout = oldStdout
output, _ := io.ReadAll(r)
if tt.expectedError == "" {
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if got := string(output); got != tt.expectedOutput {
t.Errorf("expected output:\n%s\ngot:\n%s", tt.expectedOutput, got)
}
} else {
if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
}
}
})
}
}
func TestCreateHandler(t *testing.T) {
tests := []struct {
name string

View File

@@ -167,8 +167,6 @@ var (
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
// Enable the new Ollama engine
NewEngine = Bool("OLLAMA_NEW_ENGINE")
// Ollama is running in a benchmark context, additional timing data will be collected.
Benchmark = Bool("OLLAMA_BENCHMARK")
)
func String(s string) func() string {

View File

@@ -352,10 +352,6 @@ func (c *testContext) MaxTensors() int {
return 10
}
func (c *testContext) Timing() []ml.OpTiming {
return []ml.OpTiming{}
}
func (c *testContext) Close() {}
type testTensor struct {

View File

@@ -1,24 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Tue, 18 Feb 2025 14:47:21 -0800
Subject: [PATCH] remove amx
---
ggml/src/CMakeLists.txt | 4 ----
1 file changed, 4 deletions(-)
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 72b488dd..50828717 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
- if (NOT MSVC)
- # MSVC doesn't support AMX
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
- endif()
else ()
ggml_add_cpu_backend_variant_impl("")
endif()

View File

@@ -1,285 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Sun, 16 Feb 2025 20:00:22 -0500
Subject: [PATCH] use std::filesystem::path instead of wstring
---
ggml/src/ggml-backend-reg.cpp | 116 ++++++++++++----------------------
1 file changed, 40 insertions(+), 76 deletions(-)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 84b21dd8..de78feae 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -72,16 +72,6 @@
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
-static std::wstring utf8_to_utf16(const std::string & str) {
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
- return converter.from_bytes(str);
-}
-
-static std::string utf16_to_utf8(const std::wstring & str) {
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
- return converter.to_bytes(str);
-}
-
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
}
};
-static dl_handle * dl_load_library(const std::wstring & path) {
+static dl_handle * dl_load_library(const std::filesystem::path & path) {
// suppress error dialogs for missing DLLs
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
- HMODULE handle = LoadLibraryW(path.c_str());
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
SetErrorMode(old_mode);
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
}
};
-static void * dl_load_library(const std::wstring & path) {
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
+static void * dl_load_library(const std::filesystem::path & path) {
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
return handle;
}
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
);
}
- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
+ ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
dl_handle_ptr handle { dl_load_library(path) };
if (!handle) {
if (!silent) {
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn && score_fn() == 0) {
if (!silent) {
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
if (!backend_init_fn) {
if (!silent) {
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
if (!silent) {
if (!reg) {
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
} else {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
+ __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
}
}
return nullptr;
}
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
// Dynamic loading
ggml_backend_reg_t ggml_backend_load(const char * path) {
- return get_reg().load_backend(utf8_to_utf16(path), false);
+ return get_reg().load_backend(path, false);
}
void ggml_backend_unload(ggml_backend_reg_t reg) {
get_reg().unload_backend(reg, true);
}
-static std::wstring get_executable_path() {
+static std::filesystem::path get_executable_path() {
#if defined(__APPLE__)
// get executable path
std::vector<char> path;
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
}
path.resize(size);
}
- std::string base_path(path.data(), size);
- // remove executable name
- auto last_slash = base_path.find_last_of('/');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- return utf8_to_utf16(base_path + "/");
+
+ return std::filesystem::path(path.data()).parent_path();
#elif defined(__linux__) || defined(__FreeBSD__)
- std::string base_path = ".";
std::vector<char> path(1024);
while (true) {
// get executable path
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
break;
}
if (len < (ssize_t) path.size()) {
- base_path = std::string(path.data(), len);
- // remove executable name
- auto last_slash = base_path.find_last_of('/');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- break;
+ return std::filesystem::path(path.data()).parent_path();
}
path.resize(path.size() * 2);
}
-
- return utf8_to_utf16(base_path + "/");
#elif defined(_WIN32)
std::vector<wchar_t> path(MAX_PATH);
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
if (len == 0) {
return {};
}
- std::wstring base_path(path.data(), len);
- // remove executable name
- auto last_slash = base_path.find_last_of('\\');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- return base_path + L"\\";
-#else
- return {};
-#endif
-}
-static std::wstring backend_filename_prefix() {
-#ifdef _WIN32
- return L"ggml-";
+ return std::filesystem::path(path.data()).parent_path();
#else
- return L"libggml-";
+ return {};
#endif
}
-static std::wstring backend_filename_suffix() {
+static std::string backend_filename_prefix() {
#ifdef _WIN32
- return L".dll";
+ return "ggml-";
#else
- return L".so";
+ return "libggml-";
#endif
}
-static std::wstring path_separator() {
+static std::string backend_filename_suffix() {
#ifdef _WIN32
- return L"\\";
+ return ".dll";
#else
- return L"/";
+ return ".so";
#endif
}
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
- std::vector<std::wstring> search_paths;
+ namespace fs = std::filesystem;
+ std::string file_prefix = backend_filename_prefix() + name + "-";
+ std::vector<fs::path> search_paths;
+
if (user_search_path == nullptr) {
- search_paths.push_back(L"." + path_separator());
+ search_paths.push_back(fs::current_path());
search_paths.push_back(get_executable_path());
} else {
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
+ search_paths.push_back(fs::u8path(user_search_path));
}
int best_score = 0;
- std::wstring best_path;
+ fs::path best_path;
- namespace fs = std::filesystem;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
continue;
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
for (const auto & entry : dir_it) {
try {
if (entry.is_regular_file()) {
- std::wstring filename = entry.path().filename().wstring();
- std::wstring ext = entry.path().extension().wstring();
+ std::string filename = entry.path().filename().string();
+ std::string ext = entry.path().extension().string();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
+ dl_handle_ptr handle { dl_load_library(entry.path()) };
if (!handle) {
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
continue;
}
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (!score_fn) {
- GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+ GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
continue;
}
int s = score_fn();
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
if (s > best_score) {
best_score = s;
- best_path = entry.path().wstring();
+ best_path = entry.path();
}
}
}
} catch (const std::exception & e) {
- GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
+ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
}
}
}
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
if (best_score == 0) {
// try to load the base backend
for (const auto & search_path : search_paths) {
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
+ fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
if (fs::exists(path)) {
return get_reg().load_backend(path, silent);
}

14
main.go
View File

@@ -2,6 +2,8 @@ package main
import (
"context"
"os"
"os/signal"
"github.com/spf13/cobra"
@@ -9,5 +11,15 @@ import (
)
func main() {
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt)
go func() {
<-sigChan
cancel()
}()
cobra.CheckErr(cmd.NewCLI().ExecuteContext(ctx))
}

View File

@@ -2,7 +2,6 @@ package ml
import (
"bytes"
"cmp"
"encoding/binary"
"fmt"
"os"
@@ -38,7 +37,7 @@ func RegisterBackend(name string, f func(*os.File) (Backend, error)) {
}
func NewBackend(f *os.File) (Backend, error) {
if backend, ok := backends[cmp.Or(os.Getenv("OLLAMA_BACKEND"), "ggml")]; ok {
if backend, ok := backends["ggml"]; ok {
return backend(f)
}
@@ -54,30 +53,6 @@ type Context interface {
Compute(...Tensor)
MaxTensors() int
Close()
Timing() []OpTiming
}
// OpType is the type of operation performed during a forward pass.
type OpType string
const (
View OpType = "View"
Copy OpType = "Copy"
Reshape OpType = "Reshape"
Permute OpType = "Permute"
Contiguous OpType = "Contiguous"
Input OpType = "Input"
ComputeOp OpType = "Compute"
Transpose OpType = "Transpose"
)
// OpTiming stores the timing information for a single operation.
type OpTiming struct {
Type OpType
Operation string
Duration float64
Order int
}
type Tensor interface {

View File

@@ -4,8 +4,6 @@ package ggml
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#include <string.h>
#include "ggml.h"
#include "ggml-cpu.h"
#include "ggml-backend.h"
@@ -23,54 +21,6 @@ COMPILER inline get_compiler() {
#endif
}
// Define a fixed-size struct to store timing data
#define MAX_TENSOR_NAME 256
#define MAX_TIMINGS 1000
typedef struct {
char tensor_name[MAX_TENSOR_NAME];
double duration_ms;
} timing_entry;
typedef struct {
timing_entry entries[MAX_TIMINGS];
int count;
} timing_data;
// Global timing data structure
timing_data g_timings = {0};
double get_time_ms() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000.0 + ts.tv_nsec / 1000000.0;
}
bool debug_callback(struct ggml_tensor * t, bool ask, void * user_data) {
static double start_time;
static char current_tensor[MAX_TENSOR_NAME];
if (ask) {
start_time = get_time_ms();
strncpy(current_tensor, t->name, MAX_TENSOR_NAME - 1);
current_tensor[MAX_TENSOR_NAME - 1] = '\0';
} else {
double end_time = get_time_ms();
double duration = end_time - start_time;
if (g_timings.count < MAX_TIMINGS) {
strncpy(g_timings.entries[g_timings.count].tensor_name, current_tensor, MAX_TENSOR_NAME - 1);
g_timings.entries[g_timings.count].duration_ms = duration;
g_timings.count++;
}
}
return true;
}
void clear_timings() {
g_timings.count = 0;
}
*/
import "C"
@@ -79,11 +29,9 @@ import (
"io"
"log/slog"
"os"
"strings"
"sync"
"unsafe"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
fs "github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/ml"
@@ -308,62 +256,7 @@ func (c *Context) Forward(t ml.Tensor) {
C.ggml_build_forward_expand(c.graph, t.(*Tensor).t)
}
// Timing retrieves the collected timing data
func (c *Context) Timing() []ml.OpTiming {
sequence := make([]ml.OpTiming, C.g_timings.count)
for i := range int(C.g_timings.count) {
entry := C.g_timings.entries[i]
tensorName := C.GoString(&entry.tensor_name[0])
// Determine operation type and description based on tensor name
var opType ml.OpType
var opDesc string
switch {
case strings.Contains(tensorName, "(view)"):
opType, opDesc = ml.View, "Memory view"
case strings.Contains(tensorName, "(copy)") || strings.Contains(tensorName, "(copy of"):
opType, opDesc = ml.Copy, "Memory copy"
case strings.Contains(tensorName, "(reshaped)"):
opType, opDesc = ml.Reshape, "Reshape"
case strings.Contains(tensorName, "(permuted)"):
opType, opDesc = ml.Permute, "Permute dimensions"
case strings.Contains(tensorName, "(cont)"):
opType, opDesc = ml.Contiguous, "Make contiguous"
case strings.Contains(tensorName, "(transposed)"):
opType, opDesc = ml.Transpose, "Transpose"
case strings.HasPrefix(tensorName, "leaf_"):
opType, opDesc = ml.Input, fmt.Sprintf("Input tensor %s", tensorName)
case strings.HasPrefix(tensorName, "node_"):
opType, opDesc = ml.ComputeOp, fmt.Sprintf("Computation %s", tensorName)
default:
opType, opDesc = "Unknown", tensorName
}
sequence[i] = ml.OpTiming{
Type: opType,
Operation: opDesc,
Duration: float64(entry.duration_ms),
Order: i,
}
}
return sequence
}
func (c *Context) Compute(tensors ...ml.Tensor) {
if envconfig.Benchmark() {
// Clear previous timings before new computation
C.clear_timings()
C.ggml_backend_sched_set_eval_callback(
c.sched,
C.ggml_backend_eval_callback(C.debug_callback),
nil,
)
}
C.ggml_backend_sched_graph_compute_async(c.sched, c.graph)
needSync := true

View File

@@ -293,6 +293,10 @@ if (GGML_CPU_ALL_VARIANTS)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
if (NOT MSVC)
# MSVC doesn't support AMX
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
endif()
else ()
ggml_add_cpu_backend_variant_impl("")
endif()

View File

@@ -72,6 +72,16 @@
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
static std::wstring utf8_to_utf16(const std::string & str) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.from_bytes(str);
}
static std::string utf16_to_utf8(const std::wstring & str) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.to_bytes(str);
}
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
@@ -86,12 +96,12 @@ struct dl_handle_deleter {
}
};
static dl_handle * dl_load_library(const std::filesystem::path & path) {
static dl_handle * dl_load_library(const std::wstring & path) {
// suppress error dialogs for missing DLLs
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
HMODULE handle = LoadLibraryW(path.wstring().c_str());
HMODULE handle = LoadLibraryW(path.c_str());
SetErrorMode(old_mode);
@@ -119,8 +129,8 @@ struct dl_handle_deleter {
}
};
static void * dl_load_library(const std::filesystem::path & path) {
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
static void * dl_load_library(const std::wstring & path) {
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
return handle;
}
@@ -212,11 +222,11 @@ struct ggml_backend_registry {
);
}
ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
dl_handle_ptr handle { dl_load_library(path) };
if (!handle) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
}
return nullptr;
}
@@ -224,7 +234,7 @@ struct ggml_backend_registry {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn && score_fn() == 0) {
if (!silent) {
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
}
return nullptr;
}
@@ -232,7 +242,7 @@ struct ggml_backend_registry {
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
if (!backend_init_fn) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
}
return nullptr;
}
@@ -241,16 +251,16 @@ struct ggml_backend_registry {
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
if (!silent) {
if (!reg) {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
} else {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
__func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
}
}
return nullptr;
}
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
@@ -386,14 +396,14 @@ ggml_backend_t ggml_backend_init_best(void) {
// Dynamic loading
ggml_backend_reg_t ggml_backend_load(const char * path) {
return get_reg().load_backend(path, false);
return get_reg().load_backend(utf8_to_utf16(path), false);
}
void ggml_backend_unload(ggml_backend_reg_t reg) {
get_reg().unload_backend(reg, true);
}
static std::filesystem::path get_executable_path() {
static std::wstring get_executable_path() {
#if defined(__APPLE__)
// get executable path
std::vector<char> path;
@@ -405,9 +415,15 @@ static std::filesystem::path get_executable_path() {
}
path.resize(size);
}
return std::filesystem::path(path.data()).parent_path();
std::string base_path(path.data(), size);
// remove executable name
auto last_slash = base_path.find_last_of('/');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
return utf8_to_utf16(base_path + "/");
#elif defined(__linux__) || defined(__FreeBSD__)
std::string base_path = ".";
std::vector<char> path(1024);
while (true) {
// get executable path
@@ -420,56 +436,76 @@ static std::filesystem::path get_executable_path() {
break;
}
if (len < (ssize_t) path.size()) {
return std::filesystem::path(path.data()).parent_path();
base_path = std::string(path.data(), len);
// remove executable name
auto last_slash = base_path.find_last_of('/');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
break;
}
path.resize(path.size() * 2);
}
return utf8_to_utf16(base_path + "/");
#elif defined(_WIN32)
std::vector<wchar_t> path(MAX_PATH);
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
if (len == 0) {
return {};
}
return std::filesystem::path(path.data()).parent_path();
std::wstring base_path(path.data(), len);
// remove executable name
auto last_slash = base_path.find_last_of('\\');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
return base_path + L"\\";
#else
return {};
#endif
}
static std::string backend_filename_prefix() {
static std::wstring backend_filename_prefix() {
#ifdef _WIN32
return "ggml-";
return L"ggml-";
#else
return "libggml-";
return L"libggml-";
#endif
}
static std::string backend_filename_suffix() {
static std::wstring backend_filename_suffix() {
#ifdef _WIN32
return ".dll";
return L".dll";
#else
return ".so";
return L".so";
#endif
}
static std::wstring path_separator() {
#ifdef _WIN32
return L"\\";
#else
return L"/";
#endif
}
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths
namespace fs = std::filesystem;
std::string file_prefix = backend_filename_prefix() + name + "-";
std::vector<fs::path> search_paths;
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
std::vector<std::wstring> search_paths;
if (user_search_path == nullptr) {
search_paths.push_back(fs::current_path());
search_paths.push_back(L"." + path_separator());
search_paths.push_back(get_executable_path());
} else {
search_paths.push_back(fs::u8path(user_search_path));
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
}
int best_score = 0;
fs::path best_path;
std::wstring best_path;
namespace fs = std::filesystem;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
continue;
@@ -478,31 +514,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
for (const auto & entry : dir_it) {
try {
if (entry.is_regular_file()) {
std::string filename = entry.path().filename().string();
std::string ext = entry.path().extension().string();
std::wstring filename = entry.path().filename().wstring();
std::wstring ext = entry.path().extension().wstring();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path()) };
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
if (!handle) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
continue;
}
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (!score_fn) {
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
continue;
}
int s = score_fn();
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
if (s > best_score) {
best_score = s;
best_path = entry.path();
best_path = entry.path().wstring();
}
}
}
} catch (const std::exception & e) {
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
}
}
}
@@ -510,7 +546,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
if (best_score == 0) {
// try to load the base backend
for (const auto & search_path : search_paths) {
fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
if (fs::exists(path)) {
return get_reg().load_backend(path, silent);
}

View File

@@ -49,29 +49,29 @@ func (p *Progress) stop() bool {
func (p *Progress) Stop() bool {
stopped := p.stop()
if stopped {
fmt.Fprint(p.w, "\n")
p.w.Flush()
fmt.Fprintln(p.w)
}
// show cursor
fmt.Fprint(p.w, "\033[?25h")
p.w.Flush()
return stopped
}
func (p *Progress) StopAndClear() bool {
defer p.w.Flush()
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
stopped := p.stop()
if stopped {
// clear all progress lines
for i := range p.pos {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
for range p.pos - 1 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K", "\033[1G")
}
// show cursor
fmt.Fprint(p.w, "\033[?25h")
p.w.Flush()
return stopped
}
@@ -86,19 +86,13 @@ func (p *Progress) render() {
p.mu.Lock()
defer p.mu.Unlock()
defer p.w.Flush()
// eliminate flickering on terminals that support synchronized output
fmt.Fprint(p.w, "\033[?2026h")
defer fmt.Fprint(p.w, "\033[?2026l")
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
// move the cursor back to the beginning
for range p.pos - 1 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[1G")
// render progress lines
@@ -110,10 +104,13 @@ func (p *Progress) render() {
}
p.pos = len(p.states)
p.w.Flush()
}
func (p *Progress) start() {
p.ticker = time.NewTicker(100 * time.Millisecond)
// hide cursor
fmt.Fprint(p.w, "\033[?25l")
for range p.ticker.C {
p.render()
}