Compare commits

...

7 Commits

Author SHA1 Message Date
Bruce MacDonald
057cc54b66 benchmark: compare backend graph computation times
Track execution time of individual tensor operations (views, copies, reshapes etc)
during LLM forward passes using CGo bindings to the native graph runtime. This
helps identify performance bottlenecks in the computation graph and optimize memory
operations that can significantly impact inference latency.
2025-02-19 15:22:53 -08:00
Michael Yang
1e438b237c Merge pull request #9203 from ollama/mxyng/sapphirerapids
build: remove backend build for sapphirerapids
2025-02-19 21:42:00 +00:00
yuiseki
d721a02e7d test: add test cases for ListHandler (#9146) 2025-02-19 13:24:27 -08:00
zyxucp
778603a818 docs: Add AntSK to Community Integrations (#9214) 2025-02-19 13:22:48 -08:00
maninhill
3c874df46e docs: Add MaxKB to Community Integrations (#9212) 2025-02-19 13:20:09 -08:00
Jeffrey Morgan
d2eb226c91 llama: add patch to fix ggml backend reg on Linux with utf-8 characters in the path (#9159) 2025-02-18 22:46:17 -05:00
Michael Yang
5f8c03189e build: remove backend build for sapphirerapids
sapphire rapids has amx support but it ends up having a negative
performance impact.

emerald rapids also has amx support with a positive performance impact
however there's no reasonable way in ggml to differentiate between the
two. the impact is small (~6%) so disable amx entirely for simplicity
2025-02-18 14:47:58 -08:00
11 changed files with 667 additions and 81 deletions

View File

@@ -382,6 +382,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
### Cloud

View File

@@ -0,0 +1,86 @@
package backend
import (
"flag"
"fmt"
"io"
"log"
"os"
"testing"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/server"
_ "github.com/ollama/ollama/model/models/llama"
)
var modelName = flag.String("m", "", "Name of the model to benchmark")
func suppressOutput() (cleanup func()) {
oldStdout, oldStderr := os.Stdout, os.Stderr
os.Stdout, os.Stderr = nil, nil
log.SetOutput(io.Discard)
return func() {
os.Stdout, os.Stderr = oldStdout, oldStderr
log.SetOutput(os.Stderr)
}
}
func setupModel(b *testing.B) model.Model {
if *modelName == "" {
b.Fatal("Error: -m flag is required for benchmark tests")
}
sm, err := server.GetModel(*modelName)
if err != nil {
b.Fatal(err)
}
m, err := model.New(sm.ModelPath)
if err != nil {
b.Fatal(err)
}
m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
return m
}
func BenchmarkGGMLOperations(b *testing.B) {
// loading the GGML back-end logs to standard out and makes the bench output messy
cleanup := suppressOutput()
defer cleanup()
b.Setenv("OLLAMA_BENCHMARK", "1")
b.Setenv("OLLAMA_BACKEND", "ggml")
m := setupModel(b)
// Sample input data
inputIDs := []int32{1, 2, 3, 4, 5}
options := model.Options{
Inputs: inputIDs,
Positions: []int32{1, 2, 3, 4, 5},
Sequences: []int{1, 1, 1, 1, 1},
Outputs: []int32{int32(len(inputIDs) - 1)},
}
b.ResetTimer()
for range b.N {
ctx := m.Backend().NewContext()
defer ctx.Close()
modelOutput, err := model.Forward(ctx, m, options)
if err != nil {
b.Fatal(fmt.Errorf("forward pass failed: %v", err))
}
ctx.Compute(modelOutput)
for _, op := range ctx.Timing() {
b.ReportMetric(op.Duration, fmt.Sprintf("%s_ms", op.Type))
}
}
}

View File

@@ -10,6 +10,7 @@ import (
"os"
"strings"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/spf13/cobra"
@@ -490,6 +491,96 @@ func TestPushHandler(t *testing.T) {
}
}
func TestListHandler(t *testing.T) {
tests := []struct {
name string
args []string
serverResponse []api.ListModelResponse
expectedError string
expectedOutput string
}{
{
name: "list all models",
args: []string{},
serverResponse: []api.ListModelResponse{
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-48 * time.Hour)},
},
expectedOutput: "NAME ID SIZE MODIFIED \n" +
"model1 sha256:abc12 1.0 KB 24 hours ago \n" +
"model2 sha256:def45 2.0 KB 2 days ago \n",
},
{
name: "filter models by prefix",
args: []string{"model1"},
serverResponse: []api.ListModelResponse{
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-24 * time.Hour)},
},
expectedOutput: "NAME ID SIZE MODIFIED \n" +
"model1 sha256:abc12 1.0 KB 24 hours ago \n",
},
{
name: "server error",
args: []string{},
expectedError: "server error",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/tags" || r.Method != http.MethodGet {
t.Errorf("unexpected request to %s %s", r.Method, r.URL.Path)
http.Error(w, "not found", http.StatusNotFound)
return
}
if tt.expectedError != "" {
http.Error(w, tt.expectedError, http.StatusInternalServerError)
return
}
response := api.ListResponse{Models: tt.serverResponse}
if err := json.NewEncoder(w).Encode(response); err != nil {
t.Fatal(err)
}
}))
defer mockServer.Close()
t.Setenv("OLLAMA_HOST", mockServer.URL)
cmd := &cobra.Command{}
cmd.SetContext(context.TODO())
// Capture stdout
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
err := ListHandler(cmd, tt.args)
// Restore stdout and get output
w.Close()
os.Stdout = oldStdout
output, _ := io.ReadAll(r)
if tt.expectedError == "" {
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if got := string(output); got != tt.expectedOutput {
t.Errorf("expected output:\n%s\ngot:\n%s", tt.expectedOutput, got)
}
} else {
if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
}
}
})
}
}
func TestCreateHandler(t *testing.T) {
tests := []struct {
name string

View File

@@ -167,6 +167,8 @@ var (
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
// Enable the new Ollama engine
NewEngine = Bool("OLLAMA_NEW_ENGINE")
// Ollama is running in a benchmark context, additional timing data will be collected.
Benchmark = Bool("OLLAMA_BENCHMARK")
)
func String(s string) func() string {

View File

@@ -352,6 +352,10 @@ func (c *testContext) MaxTensors() int {
return 10
}
func (c *testContext) Timing() []ml.OpTiming {
return []ml.OpTiming{}
}
func (c *testContext) Close() {}
type testTensor struct {

View File

@@ -0,0 +1,24 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Tue, 18 Feb 2025 14:47:21 -0800
Subject: [PATCH] remove amx
---
ggml/src/CMakeLists.txt | 4 ----
1 file changed, 4 deletions(-)
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 72b488dd..50828717 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
- if (NOT MSVC)
- # MSVC doesn't support AMX
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
- endif()
else ()
ggml_add_cpu_backend_variant_impl("")
endif()

View File

@@ -0,0 +1,285 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Sun, 16 Feb 2025 20:00:22 -0500
Subject: [PATCH] use std::filesystem::path instead of wstring
---
ggml/src/ggml-backend-reg.cpp | 116 ++++++++++++----------------------
1 file changed, 40 insertions(+), 76 deletions(-)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 84b21dd8..de78feae 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -72,16 +72,6 @@
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
-static std::wstring utf8_to_utf16(const std::string & str) {
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
- return converter.from_bytes(str);
-}
-
-static std::string utf16_to_utf8(const std::wstring & str) {
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
- return converter.to_bytes(str);
-}
-
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
}
};
-static dl_handle * dl_load_library(const std::wstring & path) {
+static dl_handle * dl_load_library(const std::filesystem::path & path) {
// suppress error dialogs for missing DLLs
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
- HMODULE handle = LoadLibraryW(path.c_str());
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
SetErrorMode(old_mode);
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
}
};
-static void * dl_load_library(const std::wstring & path) {
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
+static void * dl_load_library(const std::filesystem::path & path) {
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
return handle;
}
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
);
}
- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
+ ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
dl_handle_ptr handle { dl_load_library(path) };
if (!handle) {
if (!silent) {
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn && score_fn() == 0) {
if (!silent) {
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
if (!backend_init_fn) {
if (!silent) {
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
if (!silent) {
if (!reg) {
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
} else {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
+ __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
}
}
return nullptr;
}
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
// Dynamic loading
ggml_backend_reg_t ggml_backend_load(const char * path) {
- return get_reg().load_backend(utf8_to_utf16(path), false);
+ return get_reg().load_backend(path, false);
}
void ggml_backend_unload(ggml_backend_reg_t reg) {
get_reg().unload_backend(reg, true);
}
-static std::wstring get_executable_path() {
+static std::filesystem::path get_executable_path() {
#if defined(__APPLE__)
// get executable path
std::vector<char> path;
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
}
path.resize(size);
}
- std::string base_path(path.data(), size);
- // remove executable name
- auto last_slash = base_path.find_last_of('/');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- return utf8_to_utf16(base_path + "/");
+
+ return std::filesystem::path(path.data()).parent_path();
#elif defined(__linux__) || defined(__FreeBSD__)
- std::string base_path = ".";
std::vector<char> path(1024);
while (true) {
// get executable path
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
break;
}
if (len < (ssize_t) path.size()) {
- base_path = std::string(path.data(), len);
- // remove executable name
- auto last_slash = base_path.find_last_of('/');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- break;
+ return std::filesystem::path(path.data()).parent_path();
}
path.resize(path.size() * 2);
}
-
- return utf8_to_utf16(base_path + "/");
#elif defined(_WIN32)
std::vector<wchar_t> path(MAX_PATH);
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
if (len == 0) {
return {};
}
- std::wstring base_path(path.data(), len);
- // remove executable name
- auto last_slash = base_path.find_last_of('\\');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- return base_path + L"\\";
-#else
- return {};
-#endif
-}
-static std::wstring backend_filename_prefix() {
-#ifdef _WIN32
- return L"ggml-";
+ return std::filesystem::path(path.data()).parent_path();
#else
- return L"libggml-";
+ return {};
#endif
}
-static std::wstring backend_filename_suffix() {
+static std::string backend_filename_prefix() {
#ifdef _WIN32
- return L".dll";
+ return "ggml-";
#else
- return L".so";
+ return "libggml-";
#endif
}
-static std::wstring path_separator() {
+static std::string backend_filename_suffix() {
#ifdef _WIN32
- return L"\\";
+ return ".dll";
#else
- return L"/";
+ return ".so";
#endif
}
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
- std::vector<std::wstring> search_paths;
+ namespace fs = std::filesystem;
+ std::string file_prefix = backend_filename_prefix() + name + "-";
+ std::vector<fs::path> search_paths;
+
if (user_search_path == nullptr) {
- search_paths.push_back(L"." + path_separator());
+ search_paths.push_back(fs::current_path());
search_paths.push_back(get_executable_path());
} else {
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
+ search_paths.push_back(fs::u8path(user_search_path));
}
int best_score = 0;
- std::wstring best_path;
+ fs::path best_path;
- namespace fs = std::filesystem;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
continue;
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
for (const auto & entry : dir_it) {
try {
if (entry.is_regular_file()) {
- std::wstring filename = entry.path().filename().wstring();
- std::wstring ext = entry.path().extension().wstring();
+ std::string filename = entry.path().filename().string();
+ std::string ext = entry.path().extension().string();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
+ dl_handle_ptr handle { dl_load_library(entry.path()) };
if (!handle) {
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
continue;
}
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (!score_fn) {
- GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
+ GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
continue;
}
int s = score_fn();
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
if (s > best_score) {
best_score = s;
- best_path = entry.path().wstring();
+ best_path = entry.path();
}
}
}
} catch (const std::exception & e) {
- GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
+ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
}
}
}
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
if (best_score == 0) {
// try to load the base backend
for (const auto & search_path : search_paths) {
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
+ fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
if (fs::exists(path)) {
return get_reg().load_backend(path, silent);
}

View File

@@ -2,6 +2,7 @@ package ml
import (
"bytes"
"cmp"
"encoding/binary"
"fmt"
"os"
@@ -37,7 +38,7 @@ func RegisterBackend(name string, f func(*os.File) (Backend, error)) {
}
func NewBackend(f *os.File) (Backend, error) {
if backend, ok := backends["ggml"]; ok {
if backend, ok := backends[cmp.Or(os.Getenv("OLLAMA_BACKEND"), "ggml")]; ok {
return backend(f)
}
@@ -53,6 +54,30 @@ type Context interface {
Compute(...Tensor)
MaxTensors() int
Close()
Timing() []OpTiming
}
// OpType is the type of operation performed during a forward pass.
type OpType string
const (
View OpType = "View"
Copy OpType = "Copy"
Reshape OpType = "Reshape"
Permute OpType = "Permute"
Contiguous OpType = "Contiguous"
Input OpType = "Input"
ComputeOp OpType = "Compute"
Transpose OpType = "Transpose"
)
// OpTiming stores the timing information for a single operation.
type OpTiming struct {
Type OpType
Operation string
Duration float64
Order int
}
type Tensor interface {

View File

@@ -4,6 +4,8 @@ package ggml
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#include <string.h>
#include "ggml.h"
#include "ggml-cpu.h"
#include "ggml-backend.h"
@@ -21,6 +23,54 @@ COMPILER inline get_compiler() {
#endif
}
// Define a fixed-size struct to store timing data
#define MAX_TENSOR_NAME 256
#define MAX_TIMINGS 1000
typedef struct {
char tensor_name[MAX_TENSOR_NAME];
double duration_ms;
} timing_entry;
typedef struct {
timing_entry entries[MAX_TIMINGS];
int count;
} timing_data;
// Global timing data structure
timing_data g_timings = {0};
double get_time_ms() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000.0 + ts.tv_nsec / 1000000.0;
}
bool debug_callback(struct ggml_tensor * t, bool ask, void * user_data) {
static double start_time;
static char current_tensor[MAX_TENSOR_NAME];
if (ask) {
start_time = get_time_ms();
strncpy(current_tensor, t->name, MAX_TENSOR_NAME - 1);
current_tensor[MAX_TENSOR_NAME - 1] = '\0';
} else {
double end_time = get_time_ms();
double duration = end_time - start_time;
if (g_timings.count < MAX_TIMINGS) {
strncpy(g_timings.entries[g_timings.count].tensor_name, current_tensor, MAX_TENSOR_NAME - 1);
g_timings.entries[g_timings.count].duration_ms = duration;
g_timings.count++;
}
}
return true;
}
void clear_timings() {
g_timings.count = 0;
}
*/
import "C"
@@ -29,9 +79,11 @@ import (
"io"
"log/slog"
"os"
"strings"
"sync"
"unsafe"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
fs "github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/ml"
@@ -256,7 +308,62 @@ func (c *Context) Forward(t ml.Tensor) {
C.ggml_build_forward_expand(c.graph, t.(*Tensor).t)
}
// Timing retrieves the collected timing data
func (c *Context) Timing() []ml.OpTiming {
sequence := make([]ml.OpTiming, C.g_timings.count)
for i := range int(C.g_timings.count) {
entry := C.g_timings.entries[i]
tensorName := C.GoString(&entry.tensor_name[0])
// Determine operation type and description based on tensor name
var opType ml.OpType
var opDesc string
switch {
case strings.Contains(tensorName, "(view)"):
opType, opDesc = ml.View, "Memory view"
case strings.Contains(tensorName, "(copy)") || strings.Contains(tensorName, "(copy of"):
opType, opDesc = ml.Copy, "Memory copy"
case strings.Contains(tensorName, "(reshaped)"):
opType, opDesc = ml.Reshape, "Reshape"
case strings.Contains(tensorName, "(permuted)"):
opType, opDesc = ml.Permute, "Permute dimensions"
case strings.Contains(tensorName, "(cont)"):
opType, opDesc = ml.Contiguous, "Make contiguous"
case strings.Contains(tensorName, "(transposed)"):
opType, opDesc = ml.Transpose, "Transpose"
case strings.HasPrefix(tensorName, "leaf_"):
opType, opDesc = ml.Input, fmt.Sprintf("Input tensor %s", tensorName)
case strings.HasPrefix(tensorName, "node_"):
opType, opDesc = ml.ComputeOp, fmt.Sprintf("Computation %s", tensorName)
default:
opType, opDesc = "Unknown", tensorName
}
sequence[i] = ml.OpTiming{
Type: opType,
Operation: opDesc,
Duration: float64(entry.duration_ms),
Order: i,
}
}
return sequence
}
func (c *Context) Compute(tensors ...ml.Tensor) {
if envconfig.Benchmark() {
// Clear previous timings before new computation
C.clear_timings()
C.ggml_backend_sched_set_eval_callback(
c.sched,
C.ggml_backend_eval_callback(C.debug_callback),
nil,
)
}
C.ggml_backend_sched_graph_compute_async(c.sched, c.graph)
needSync := true

View File

@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
if (NOT MSVC)
# MSVC doesn't support AMX
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
endif()
else ()
ggml_add_cpu_backend_variant_impl("")
endif()

View File

@@ -72,16 +72,6 @@
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
static std::wstring utf8_to_utf16(const std::string & str) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.from_bytes(str);
}
static std::string utf16_to_utf8(const std::wstring & str) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.to_bytes(str);
}
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
}
};
static dl_handle * dl_load_library(const std::wstring & path) {
static dl_handle * dl_load_library(const std::filesystem::path & path) {
// suppress error dialogs for missing DLLs
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
HMODULE handle = LoadLibraryW(path.c_str());
HMODULE handle = LoadLibraryW(path.wstring().c_str());
SetErrorMode(old_mode);
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
}
};
static void * dl_load_library(const std::wstring & path) {
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
static void * dl_load_library(const std::filesystem::path & path) {
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
return handle;
}
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
);
}
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
dl_handle_ptr handle { dl_load_library(path) };
if (!handle) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn && score_fn() == 0) {
if (!silent) {
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
if (!backend_init_fn) {
if (!silent) {
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
}
return nullptr;
}
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
if (!silent) {
if (!reg) {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
} else {
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
__func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
}
}
return nullptr;
}
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
// Dynamic loading
ggml_backend_reg_t ggml_backend_load(const char * path) {
return get_reg().load_backend(utf8_to_utf16(path), false);
return get_reg().load_backend(path, false);
}
void ggml_backend_unload(ggml_backend_reg_t reg) {
get_reg().unload_backend(reg, true);
}
static std::wstring get_executable_path() {
static std::filesystem::path get_executable_path() {
#if defined(__APPLE__)
// get executable path
std::vector<char> path;
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
}
path.resize(size);
}
std::string base_path(path.data(), size);
// remove executable name
auto last_slash = base_path.find_last_of('/');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
return utf8_to_utf16(base_path + "/");
return std::filesystem::path(path.data()).parent_path();
#elif defined(__linux__) || defined(__FreeBSD__)
std::string base_path = ".";
std::vector<char> path(1024);
while (true) {
// get executable path
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
break;
}
if (len < (ssize_t) path.size()) {
base_path = std::string(path.data(), len);
// remove executable name
auto last_slash = base_path.find_last_of('/');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
break;
return std::filesystem::path(path.data()).parent_path();
}
path.resize(path.size() * 2);
}
return utf8_to_utf16(base_path + "/");
#elif defined(_WIN32)
std::vector<wchar_t> path(MAX_PATH);
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
if (len == 0) {
return {};
}
std::wstring base_path(path.data(), len);
// remove executable name
auto last_slash = base_path.find_last_of('\\');
if (last_slash != std::string::npos) {
base_path = base_path.substr(0, last_slash);
}
return base_path + L"\\";
return std::filesystem::path(path.data()).parent_path();
#else
return {};
#endif
}
static std::wstring backend_filename_prefix() {
static std::string backend_filename_prefix() {
#ifdef _WIN32
return L"ggml-";
return "ggml-";
#else
return L"libggml-";
return "libggml-";
#endif
}
static std::wstring backend_filename_suffix() {
static std::string backend_filename_suffix() {
#ifdef _WIN32
return L".dll";
return ".dll";
#else
return L".so";
#endif
}
static std::wstring path_separator() {
#ifdef _WIN32
return L"\\";
#else
return L"/";
return ".so";
#endif
}
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
std::vector<std::wstring> search_paths;
namespace fs = std::filesystem;
std::string file_prefix = backend_filename_prefix() + name + "-";
std::vector<fs::path> search_paths;
if (user_search_path == nullptr) {
search_paths.push_back(L"." + path_separator());
search_paths.push_back(fs::current_path());
search_paths.push_back(get_executable_path());
} else {
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
search_paths.push_back(fs::u8path(user_search_path));
}
int best_score = 0;
std::wstring best_path;
fs::path best_path;
namespace fs = std::filesystem;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
continue;
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
for (const auto & entry : dir_it) {
try {
if (entry.is_regular_file()) {
std::wstring filename = entry.path().filename().wstring();
std::wstring ext = entry.path().extension().wstring();
std::string filename = entry.path().filename().string();
std::string ext = entry.path().extension().string();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
dl_handle_ptr handle { dl_load_library(entry.path()) };
if (!handle) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
continue;
}
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (!score_fn) {
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
continue;
}
int s = score_fn();
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
if (s > best_score) {
best_score = s;
best_path = entry.path().wstring();
best_path = entry.path();
}
}
}
} catch (const std::exception & e) {
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
}
}
}
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
if (best_score == 0) {
// try to load the base backend
for (const auto & search_path : search_paths) {
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
if (fs::exists(path)) {
return get_reg().load_backend(path, silent);
}