mirror of
https://github.com/ollama/ollama.git
synced 2026-01-04 13:39:28 -05:00
Compare commits
2 Commits
brucemacd/
...
progress-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fcfbb06f1b | ||
|
|
e8d35d0de0 |
@@ -382,8 +382,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
||||
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
|
||||
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
|
||||
|
||||
### Cloud
|
||||
|
||||
|
||||
@@ -126,7 +126,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
const maxBufferSize = 512 * format.KiloByte
|
||||
@@ -189,7 +190,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/ml"
|
||||
"github.com/ollama/ollama/model"
|
||||
"github.com/ollama/ollama/server"
|
||||
|
||||
_ "github.com/ollama/ollama/model/models/llama"
|
||||
)
|
||||
|
||||
var modelName = flag.String("m", "", "Name of the model to benchmark")
|
||||
|
||||
func suppressOutput() (cleanup func()) {
|
||||
oldStdout, oldStderr := os.Stdout, os.Stderr
|
||||
os.Stdout, os.Stderr = nil, nil
|
||||
log.SetOutput(io.Discard)
|
||||
|
||||
return func() {
|
||||
os.Stdout, os.Stderr = oldStdout, oldStderr
|
||||
log.SetOutput(os.Stderr)
|
||||
}
|
||||
}
|
||||
|
||||
func setupModel(b *testing.B) model.Model {
|
||||
if *modelName == "" {
|
||||
b.Fatal("Error: -m flag is required for benchmark tests")
|
||||
}
|
||||
|
||||
sm, err := server.GetModel(*modelName)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
m, err := model.New(sm.ModelPath)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
m.Config().Cache.Init(m.Backend(), ml.DTypeF32, 2048)
|
||||
return m
|
||||
}
|
||||
|
||||
func BenchmarkGGMLOperations(b *testing.B) {
|
||||
// loading the GGML back-end logs to standard out and makes the bench output messy
|
||||
cleanup := suppressOutput()
|
||||
defer cleanup()
|
||||
|
||||
b.Setenv("OLLAMA_BENCHMARK", "1")
|
||||
b.Setenv("OLLAMA_BACKEND", "ggml")
|
||||
|
||||
m := setupModel(b)
|
||||
|
||||
// Sample input data
|
||||
inputIDs := []int32{1, 2, 3, 4, 5}
|
||||
options := model.Options{
|
||||
Inputs: inputIDs,
|
||||
Positions: []int32{1, 2, 3, 4, 5},
|
||||
Sequences: []int{1, 1, 1, 1, 1},
|
||||
Outputs: []int32{int32(len(inputIDs) - 1)},
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for range b.N {
|
||||
ctx := m.Backend().NewContext()
|
||||
defer ctx.Close()
|
||||
|
||||
modelOutput, err := model.Forward(ctx, m, options)
|
||||
if err != nil {
|
||||
b.Fatal(fmt.Errorf("forward pass failed: %v", err))
|
||||
}
|
||||
|
||||
ctx.Compute(modelOutput)
|
||||
|
||||
for _, op := range ctx.Timing() {
|
||||
b.ReportMetric(op.Duration, fmt.Sprintf("%s_ms", op.Type))
|
||||
}
|
||||
}
|
||||
}
|
||||
38
cmd/cmd.go
38
cmd/cmd.go
@@ -15,13 +15,11 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/console"
|
||||
@@ -330,6 +328,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
}
|
||||
return info, err
|
||||
@@ -858,17 +857,6 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
||||
spinner := progress.NewSpinner("")
|
||||
p.Add("", spinner)
|
||||
|
||||
cancelCtx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
}()
|
||||
|
||||
var state *displayResponseState = &displayResponseState{}
|
||||
var latest api.ChatResponse
|
||||
var fullResponse strings.Builder
|
||||
@@ -903,10 +891,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
||||
req.KeepAlive = opts.KeepAlive
|
||||
}
|
||||
|
||||
if err := client.Chat(cancelCtx, req, fn); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil, nil
|
||||
}
|
||||
if err := client.Chat(cmd.Context(), req, fn); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -946,17 +931,6 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
generateContext = []int{}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
}()
|
||||
|
||||
var state *displayResponseState = &displayResponseState{}
|
||||
|
||||
fn := func(response api.GenerateResponse) error {
|
||||
@@ -992,10 +966,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
KeepAlive: opts.KeepAlive,
|
||||
}
|
||||
|
||||
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
if err := client.Generate(cmd.Context(), &request, fn); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1017,8 +988,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
latest.Summary()
|
||||
}
|
||||
|
||||
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
|
||||
cmd.SetContext(ctx)
|
||||
cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/spf13/cobra"
|
||||
@@ -491,96 +490,6 @@ func TestPushHandler(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestListHandler(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
args []string
|
||||
serverResponse []api.ListModelResponse
|
||||
expectedError string
|
||||
expectedOutput string
|
||||
}{
|
||||
{
|
||||
name: "list all models",
|
||||
args: []string{},
|
||||
serverResponse: []api.ListModelResponse{
|
||||
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-48 * time.Hour)},
|
||||
},
|
||||
expectedOutput: "NAME ID SIZE MODIFIED \n" +
|
||||
"model1 sha256:abc12 1.0 KB 24 hours ago \n" +
|
||||
"model2 sha256:def45 2.0 KB 2 days ago \n",
|
||||
},
|
||||
{
|
||||
name: "filter models by prefix",
|
||||
args: []string{"model1"},
|
||||
serverResponse: []api.ListModelResponse{
|
||||
{Name: "model1", Digest: "sha256:abc123", Size: 1024, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||
{Name: "model2", Digest: "sha256:def456", Size: 2048, ModifiedAt: time.Now().Add(-24 * time.Hour)},
|
||||
},
|
||||
expectedOutput: "NAME ID SIZE MODIFIED \n" +
|
||||
"model1 sha256:abc12 1.0 KB 24 hours ago \n",
|
||||
},
|
||||
{
|
||||
name: "server error",
|
||||
args: []string{},
|
||||
expectedError: "server error",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/api/tags" || r.Method != http.MethodGet {
|
||||
t.Errorf("unexpected request to %s %s", r.Method, r.URL.Path)
|
||||
http.Error(w, "not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
if tt.expectedError != "" {
|
||||
http.Error(w, tt.expectedError, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
response := api.ListResponse{Models: tt.serverResponse}
|
||||
if err := json.NewEncoder(w).Encode(response); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}))
|
||||
defer mockServer.Close()
|
||||
|
||||
t.Setenv("OLLAMA_HOST", mockServer.URL)
|
||||
|
||||
cmd := &cobra.Command{}
|
||||
cmd.SetContext(context.TODO())
|
||||
|
||||
// Capture stdout
|
||||
oldStdout := os.Stdout
|
||||
r, w, _ := os.Pipe()
|
||||
os.Stdout = w
|
||||
|
||||
err := ListHandler(cmd, tt.args)
|
||||
|
||||
// Restore stdout and get output
|
||||
w.Close()
|
||||
os.Stdout = oldStdout
|
||||
output, _ := io.ReadAll(r)
|
||||
|
||||
if tt.expectedError == "" {
|
||||
if err != nil {
|
||||
t.Errorf("expected no error, got %v", err)
|
||||
}
|
||||
if got := string(output); got != tt.expectedOutput {
|
||||
t.Errorf("expected output:\n%s\ngot:\n%s", tt.expectedOutput, got)
|
||||
}
|
||||
} else {
|
||||
if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
|
||||
t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateHandler(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@@ -167,8 +167,6 @@ var (
|
||||
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
|
||||
// Enable the new Ollama engine
|
||||
NewEngine = Bool("OLLAMA_NEW_ENGINE")
|
||||
// Ollama is running in a benchmark context, additional timing data will be collected.
|
||||
Benchmark = Bool("OLLAMA_BENCHMARK")
|
||||
)
|
||||
|
||||
func String(s string) func() string {
|
||||
|
||||
@@ -352,10 +352,6 @@ func (c *testContext) MaxTensors() int {
|
||||
return 10
|
||||
}
|
||||
|
||||
func (c *testContext) Timing() []ml.OpTiming {
|
||||
return []ml.OpTiming{}
|
||||
}
|
||||
|
||||
func (c *testContext) Close() {}
|
||||
|
||||
type testTensor struct {
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Tue, 18 Feb 2025 14:47:21 -0800
|
||||
Subject: [PATCH] remove amx
|
||||
|
||||
---
|
||||
ggml/src/CMakeLists.txt | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||
index 72b488dd..50828717 100644
|
||||
--- a/ggml/src/CMakeLists.txt
|
||||
+++ b/ggml/src/CMakeLists.txt
|
||||
@@ -293,10 +293,6 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
||||
- if (NOT MSVC)
|
||||
- # MSVC doesn't support AMX
|
||||
- ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
||||
- endif()
|
||||
else ()
|
||||
ggml_add_cpu_backend_variant_impl("")
|
||||
endif()
|
||||
@@ -1,285 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Sun, 16 Feb 2025 20:00:22 -0500
|
||||
Subject: [PATCH] use std::filesystem::path instead of wstring
|
||||
|
||||
---
|
||||
ggml/src/ggml-backend-reg.cpp | 116 ++++++++++++----------------------
|
||||
1 file changed, 40 insertions(+), 76 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
|
||||
index 84b21dd8..de78feae 100644
|
||||
--- a/ggml/src/ggml-backend-reg.cpp
|
||||
+++ b/ggml/src/ggml-backend-reg.cpp
|
||||
@@ -72,16 +72,6 @@
|
||||
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
|
||||
-static std::wstring utf8_to_utf16(const std::string & str) {
|
||||
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
- return converter.from_bytes(str);
|
||||
-}
|
||||
-
|
||||
-static std::string utf16_to_utf8(const std::wstring & str) {
|
||||
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
- return converter.to_bytes(str);
|
||||
-}
|
||||
-
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
@@ -96,12 +86,12 @@ struct dl_handle_deleter {
|
||||
}
|
||||
};
|
||||
|
||||
-static dl_handle * dl_load_library(const std::wstring & path) {
|
||||
+static dl_handle * dl_load_library(const std::filesystem::path & path) {
|
||||
// suppress error dialogs for missing DLLs
|
||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
- HMODULE handle = LoadLibraryW(path.c_str());
|
||||
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
|
||||
@@ -129,8 +119,8 @@ struct dl_handle_deleter {
|
||||
}
|
||||
};
|
||||
|
||||
-static void * dl_load_library(const std::wstring & path) {
|
||||
- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
+static void * dl_load_library(const std::filesystem::path & path) {
|
||||
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
|
||||
return handle;
|
||||
}
|
||||
@@ -222,11 +212,11 @@ struct ggml_backend_registry {
|
||||
);
|
||||
}
|
||||
|
||||
- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
||||
+ ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
|
||||
dl_handle_ptr handle { dl_load_library(path) };
|
||||
if (!handle) {
|
||||
if (!silent) {
|
||||
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -234,7 +224,7 @@ struct ggml_backend_registry {
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (score_fn && score_fn() == 0) {
|
||||
if (!silent) {
|
||||
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
||||
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -242,7 +232,7 @@ struct ggml_backend_registry {
|
||||
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||
if (!backend_init_fn) {
|
||||
if (!silent) {
|
||||
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -251,16 +241,16 @@ struct ggml_backend_registry {
|
||||
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||
if (!silent) {
|
||||
if (!reg) {
|
||||
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
||||
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
|
||||
} else {
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||
- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||
+ __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
||||
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
|
||||
|
||||
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
||||
|
||||
@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
||||
|
||||
// Dynamic loading
|
||||
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
||||
- return get_reg().load_backend(utf8_to_utf16(path), false);
|
||||
+ return get_reg().load_backend(path, false);
|
||||
}
|
||||
|
||||
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
||||
get_reg().unload_backend(reg, true);
|
||||
}
|
||||
|
||||
-static std::wstring get_executable_path() {
|
||||
+static std::filesystem::path get_executable_path() {
|
||||
#if defined(__APPLE__)
|
||||
// get executable path
|
||||
std::vector<char> path;
|
||||
@@ -415,15 +405,9 @@ static std::wstring get_executable_path() {
|
||||
}
|
||||
path.resize(size);
|
||||
}
|
||||
- std::string base_path(path.data(), size);
|
||||
- // remove executable name
|
||||
- auto last_slash = base_path.find_last_of('/');
|
||||
- if (last_slash != std::string::npos) {
|
||||
- base_path = base_path.substr(0, last_slash);
|
||||
- }
|
||||
- return utf8_to_utf16(base_path + "/");
|
||||
+
|
||||
+ return std::filesystem::path(path.data()).parent_path();
|
||||
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||
- std::string base_path = ".";
|
||||
std::vector<char> path(1024);
|
||||
while (true) {
|
||||
// get executable path
|
||||
@@ -436,76 +420,56 @@ static std::wstring get_executable_path() {
|
||||
break;
|
||||
}
|
||||
if (len < (ssize_t) path.size()) {
|
||||
- base_path = std::string(path.data(), len);
|
||||
- // remove executable name
|
||||
- auto last_slash = base_path.find_last_of('/');
|
||||
- if (last_slash != std::string::npos) {
|
||||
- base_path = base_path.substr(0, last_slash);
|
||||
- }
|
||||
- break;
|
||||
+ return std::filesystem::path(path.data()).parent_path();
|
||||
}
|
||||
path.resize(path.size() * 2);
|
||||
}
|
||||
-
|
||||
- return utf8_to_utf16(base_path + "/");
|
||||
#elif defined(_WIN32)
|
||||
std::vector<wchar_t> path(MAX_PATH);
|
||||
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
||||
if (len == 0) {
|
||||
return {};
|
||||
}
|
||||
- std::wstring base_path(path.data(), len);
|
||||
- // remove executable name
|
||||
- auto last_slash = base_path.find_last_of('\\');
|
||||
- if (last_slash != std::string::npos) {
|
||||
- base_path = base_path.substr(0, last_slash);
|
||||
- }
|
||||
- return base_path + L"\\";
|
||||
-#else
|
||||
- return {};
|
||||
-#endif
|
||||
-}
|
||||
|
||||
-static std::wstring backend_filename_prefix() {
|
||||
-#ifdef _WIN32
|
||||
- return L"ggml-";
|
||||
+ return std::filesystem::path(path.data()).parent_path();
|
||||
#else
|
||||
- return L"libggml-";
|
||||
+ return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
-static std::wstring backend_filename_suffix() {
|
||||
+static std::string backend_filename_prefix() {
|
||||
#ifdef _WIN32
|
||||
- return L".dll";
|
||||
+ return "ggml-";
|
||||
#else
|
||||
- return L".so";
|
||||
+ return "libggml-";
|
||||
#endif
|
||||
}
|
||||
|
||||
-static std::wstring path_separator() {
|
||||
+static std::string backend_filename_suffix() {
|
||||
#ifdef _WIN32
|
||||
- return L"\\";
|
||||
+ return ".dll";
|
||||
#else
|
||||
- return L"/";
|
||||
+ return ".so";
|
||||
#endif
|
||||
}
|
||||
|
||||
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
||||
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||
// TODO: search system paths
|
||||
- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
||||
- std::vector<std::wstring> search_paths;
|
||||
+ namespace fs = std::filesystem;
|
||||
+ std::string file_prefix = backend_filename_prefix() + name + "-";
|
||||
+ std::vector<fs::path> search_paths;
|
||||
+
|
||||
if (user_search_path == nullptr) {
|
||||
- search_paths.push_back(L"." + path_separator());
|
||||
+ search_paths.push_back(fs::current_path());
|
||||
search_paths.push_back(get_executable_path());
|
||||
} else {
|
||||
- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
||||
+ search_paths.push_back(fs::u8path(user_search_path));
|
||||
}
|
||||
|
||||
int best_score = 0;
|
||||
- std::wstring best_path;
|
||||
+ fs::path best_path;
|
||||
|
||||
- namespace fs = std::filesystem;
|
||||
for (const auto & search_path : search_paths) {
|
||||
if (!fs::exists(search_path)) {
|
||||
continue;
|
||||
@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||
for (const auto & entry : dir_it) {
|
||||
try {
|
||||
if (entry.is_regular_file()) {
|
||||
- std::wstring filename = entry.path().filename().wstring();
|
||||
- std::wstring ext = entry.path().extension().wstring();
|
||||
+ std::string filename = entry.path().filename().string();
|
||||
+ std::string ext = entry.path().extension().string();
|
||||
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||
- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
||||
+ dl_handle_ptr handle { dl_load_library(entry.path()) };
|
||||
if (!handle) {
|
||||
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (!score_fn) {
|
||||
- GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||
+ GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
int s = score_fn();
|
||||
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
||||
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
||||
if (s > best_score) {
|
||||
best_score = s;
|
||||
- best_path = entry.path().wstring();
|
||||
+ best_path = entry.path();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
- GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
|
||||
+ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||
if (best_score == 0) {
|
||||
// try to load the base backend
|
||||
for (const auto & search_path : search_paths) {
|
||||
- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
||||
+ fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
|
||||
if (fs::exists(path)) {
|
||||
return get_reg().load_backend(path, silent);
|
||||
}
|
||||
14
main.go
14
main.go
@@ -2,6 +2,8 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"os/signal"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
@@ -9,5 +11,15 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, os.Interrupt)
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
}()
|
||||
|
||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(ctx))
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package ml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
@@ -38,7 +37,7 @@ func RegisterBackend(name string, f func(*os.File) (Backend, error)) {
|
||||
}
|
||||
|
||||
func NewBackend(f *os.File) (Backend, error) {
|
||||
if backend, ok := backends[cmp.Or(os.Getenv("OLLAMA_BACKEND"), "ggml")]; ok {
|
||||
if backend, ok := backends["ggml"]; ok {
|
||||
return backend(f)
|
||||
}
|
||||
|
||||
@@ -54,30 +53,6 @@ type Context interface {
|
||||
Compute(...Tensor)
|
||||
MaxTensors() int
|
||||
Close()
|
||||
|
||||
Timing() []OpTiming
|
||||
}
|
||||
|
||||
// OpType is the type of operation performed during a forward pass.
|
||||
type OpType string
|
||||
|
||||
const (
|
||||
View OpType = "View"
|
||||
Copy OpType = "Copy"
|
||||
Reshape OpType = "Reshape"
|
||||
Permute OpType = "Permute"
|
||||
Contiguous OpType = "Contiguous"
|
||||
Input OpType = "Input"
|
||||
ComputeOp OpType = "Compute"
|
||||
Transpose OpType = "Transpose"
|
||||
)
|
||||
|
||||
// OpTiming stores the timing information for a single operation.
|
||||
type OpTiming struct {
|
||||
Type OpType
|
||||
Operation string
|
||||
Duration float64
|
||||
Order int
|
||||
}
|
||||
|
||||
type Tensor interface {
|
||||
|
||||
@@ -4,8 +4,6 @@ package ggml
|
||||
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
#include "ggml.h"
|
||||
#include "ggml-cpu.h"
|
||||
#include "ggml-backend.h"
|
||||
@@ -23,54 +21,6 @@ COMPILER inline get_compiler() {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Define a fixed-size struct to store timing data
|
||||
#define MAX_TENSOR_NAME 256
|
||||
#define MAX_TIMINGS 1000
|
||||
|
||||
typedef struct {
|
||||
char tensor_name[MAX_TENSOR_NAME];
|
||||
double duration_ms;
|
||||
} timing_entry;
|
||||
|
||||
typedef struct {
|
||||
timing_entry entries[MAX_TIMINGS];
|
||||
int count;
|
||||
} timing_data;
|
||||
|
||||
// Global timing data structure
|
||||
timing_data g_timings = {0};
|
||||
|
||||
double get_time_ms() {
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return ts.tv_sec * 1000.0 + ts.tv_nsec / 1000000.0;
|
||||
}
|
||||
|
||||
bool debug_callback(struct ggml_tensor * t, bool ask, void * user_data) {
|
||||
static double start_time;
|
||||
static char current_tensor[MAX_TENSOR_NAME];
|
||||
|
||||
if (ask) {
|
||||
start_time = get_time_ms();
|
||||
strncpy(current_tensor, t->name, MAX_TENSOR_NAME - 1);
|
||||
current_tensor[MAX_TENSOR_NAME - 1] = '\0';
|
||||
} else {
|
||||
double end_time = get_time_ms();
|
||||
double duration = end_time - start_time;
|
||||
|
||||
if (g_timings.count < MAX_TIMINGS) {
|
||||
strncpy(g_timings.entries[g_timings.count].tensor_name, current_tensor, MAX_TENSOR_NAME - 1);
|
||||
g_timings.entries[g_timings.count].duration_ms = duration;
|
||||
g_timings.count++;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void clear_timings() {
|
||||
g_timings.count = 0;
|
||||
}
|
||||
|
||||
*/
|
||||
import "C"
|
||||
|
||||
@@ -79,11 +29,9 @@ import (
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
fs "github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/ollama/ollama/ml"
|
||||
@@ -308,62 +256,7 @@ func (c *Context) Forward(t ml.Tensor) {
|
||||
C.ggml_build_forward_expand(c.graph, t.(*Tensor).t)
|
||||
}
|
||||
|
||||
// Timing retrieves the collected timing data
|
||||
func (c *Context) Timing() []ml.OpTiming {
|
||||
sequence := make([]ml.OpTiming, C.g_timings.count)
|
||||
|
||||
for i := range int(C.g_timings.count) {
|
||||
entry := C.g_timings.entries[i]
|
||||
tensorName := C.GoString(&entry.tensor_name[0])
|
||||
|
||||
// Determine operation type and description based on tensor name
|
||||
var opType ml.OpType
|
||||
var opDesc string
|
||||
|
||||
switch {
|
||||
case strings.Contains(tensorName, "(view)"):
|
||||
opType, opDesc = ml.View, "Memory view"
|
||||
case strings.Contains(tensorName, "(copy)") || strings.Contains(tensorName, "(copy of"):
|
||||
opType, opDesc = ml.Copy, "Memory copy"
|
||||
case strings.Contains(tensorName, "(reshaped)"):
|
||||
opType, opDesc = ml.Reshape, "Reshape"
|
||||
case strings.Contains(tensorName, "(permuted)"):
|
||||
opType, opDesc = ml.Permute, "Permute dimensions"
|
||||
case strings.Contains(tensorName, "(cont)"):
|
||||
opType, opDesc = ml.Contiguous, "Make contiguous"
|
||||
case strings.Contains(tensorName, "(transposed)"):
|
||||
opType, opDesc = ml.Transpose, "Transpose"
|
||||
case strings.HasPrefix(tensorName, "leaf_"):
|
||||
opType, opDesc = ml.Input, fmt.Sprintf("Input tensor %s", tensorName)
|
||||
case strings.HasPrefix(tensorName, "node_"):
|
||||
opType, opDesc = ml.ComputeOp, fmt.Sprintf("Computation %s", tensorName)
|
||||
default:
|
||||
opType, opDesc = "Unknown", tensorName
|
||||
}
|
||||
|
||||
sequence[i] = ml.OpTiming{
|
||||
Type: opType,
|
||||
Operation: opDesc,
|
||||
Duration: float64(entry.duration_ms),
|
||||
Order: i,
|
||||
}
|
||||
}
|
||||
|
||||
return sequence
|
||||
}
|
||||
|
||||
func (c *Context) Compute(tensors ...ml.Tensor) {
|
||||
if envconfig.Benchmark() {
|
||||
// Clear previous timings before new computation
|
||||
C.clear_timings()
|
||||
|
||||
C.ggml_backend_sched_set_eval_callback(
|
||||
c.sched,
|
||||
C.ggml_backend_eval_callback(C.debug_callback),
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
C.ggml_backend_sched_graph_compute_async(c.sched, c.graph)
|
||||
|
||||
needSync := true
|
||||
|
||||
4
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
4
ml/backend/ggml/ggml/src/CMakeLists.txt
vendored
@@ -293,6 +293,10 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
||||
if (NOT MSVC)
|
||||
# MSVC doesn't support AMX
|
||||
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
||||
endif()
|
||||
else ()
|
||||
ggml_add_cpu_backend_variant_impl("")
|
||||
endif()
|
||||
|
||||
116
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
116
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
@@ -72,6 +72,16 @@
|
||||
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
|
||||
static std::wstring utf8_to_utf16(const std::string & str) {
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
return converter.from_bytes(str);
|
||||
}
|
||||
|
||||
static std::string utf16_to_utf8(const std::wstring & str) {
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
return converter.to_bytes(str);
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
@@ -86,12 +96,12 @@ struct dl_handle_deleter {
|
||||
}
|
||||
};
|
||||
|
||||
static dl_handle * dl_load_library(const std::filesystem::path & path) {
|
||||
static dl_handle * dl_load_library(const std::wstring & path) {
|
||||
// suppress error dialogs for missing DLLs
|
||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||
HMODULE handle = LoadLibraryW(path.c_str());
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
|
||||
@@ -119,8 +129,8 @@ struct dl_handle_deleter {
|
||||
}
|
||||
};
|
||||
|
||||
static void * dl_load_library(const std::filesystem::path & path) {
|
||||
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
static void * dl_load_library(const std::wstring & path) {
|
||||
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
|
||||
return handle;
|
||||
}
|
||||
@@ -212,11 +222,11 @@ struct ggml_backend_registry {
|
||||
);
|
||||
}
|
||||
|
||||
ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
|
||||
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
||||
dl_handle_ptr handle { dl_load_library(path) };
|
||||
if (!handle) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str());
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -224,7 +234,7 @@ struct ggml_backend_registry {
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (score_fn && score_fn() == 0) {
|
||||
if (!silent) {
|
||||
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str());
|
||||
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -232,7 +242,7 @@ struct ggml_backend_registry {
|
||||
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||
if (!backend_init_fn) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str());
|
||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -241,16 +251,16 @@ struct ggml_backend_registry {
|
||||
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||
if (!silent) {
|
||||
if (!reg) {
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str());
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
||||
} else {
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||
__func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str());
|
||||
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
||||
|
||||
register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
||||
|
||||
@@ -386,14 +396,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
||||
|
||||
// Dynamic loading
|
||||
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
||||
return get_reg().load_backend(path, false);
|
||||
return get_reg().load_backend(utf8_to_utf16(path), false);
|
||||
}
|
||||
|
||||
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
||||
get_reg().unload_backend(reg, true);
|
||||
}
|
||||
|
||||
static std::filesystem::path get_executable_path() {
|
||||
static std::wstring get_executable_path() {
|
||||
#if defined(__APPLE__)
|
||||
// get executable path
|
||||
std::vector<char> path;
|
||||
@@ -405,9 +415,15 @@ static std::filesystem::path get_executable_path() {
|
||||
}
|
||||
path.resize(size);
|
||||
}
|
||||
|
||||
return std::filesystem::path(path.data()).parent_path();
|
||||
std::string base_path(path.data(), size);
|
||||
// remove executable name
|
||||
auto last_slash = base_path.find_last_of('/');
|
||||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
return utf8_to_utf16(base_path + "/");
|
||||
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||
std::string base_path = ".";
|
||||
std::vector<char> path(1024);
|
||||
while (true) {
|
||||
// get executable path
|
||||
@@ -420,56 +436,76 @@ static std::filesystem::path get_executable_path() {
|
||||
break;
|
||||
}
|
||||
if (len < (ssize_t) path.size()) {
|
||||
return std::filesystem::path(path.data()).parent_path();
|
||||
base_path = std::string(path.data(), len);
|
||||
// remove executable name
|
||||
auto last_slash = base_path.find_last_of('/');
|
||||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
break;
|
||||
}
|
||||
path.resize(path.size() * 2);
|
||||
}
|
||||
|
||||
return utf8_to_utf16(base_path + "/");
|
||||
#elif defined(_WIN32)
|
||||
std::vector<wchar_t> path(MAX_PATH);
|
||||
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
||||
if (len == 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return std::filesystem::path(path.data()).parent_path();
|
||||
std::wstring base_path(path.data(), len);
|
||||
// remove executable name
|
||||
auto last_slash = base_path.find_last_of('\\');
|
||||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
return base_path + L"\\";
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::string backend_filename_prefix() {
|
||||
static std::wstring backend_filename_prefix() {
|
||||
#ifdef _WIN32
|
||||
return "ggml-";
|
||||
return L"ggml-";
|
||||
#else
|
||||
return "libggml-";
|
||||
return L"libggml-";
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::string backend_filename_suffix() {
|
||||
static std::wstring backend_filename_suffix() {
|
||||
#ifdef _WIN32
|
||||
return ".dll";
|
||||
return L".dll";
|
||||
#else
|
||||
return ".so";
|
||||
return L".so";
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::wstring path_separator() {
|
||||
#ifdef _WIN32
|
||||
return L"\\";
|
||||
#else
|
||||
return L"/";
|
||||
#endif
|
||||
}
|
||||
|
||||
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
||||
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||
// TODO: search system paths
|
||||
namespace fs = std::filesystem;
|
||||
std::string file_prefix = backend_filename_prefix() + name + "-";
|
||||
std::vector<fs::path> search_paths;
|
||||
|
||||
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
||||
std::vector<std::wstring> search_paths;
|
||||
if (user_search_path == nullptr) {
|
||||
search_paths.push_back(fs::current_path());
|
||||
search_paths.push_back(L"." + path_separator());
|
||||
search_paths.push_back(get_executable_path());
|
||||
} else {
|
||||
search_paths.push_back(fs::u8path(user_search_path));
|
||||
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
||||
}
|
||||
|
||||
int best_score = 0;
|
||||
fs::path best_path;
|
||||
std::wstring best_path;
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
for (const auto & search_path : search_paths) {
|
||||
if (!fs::exists(search_path)) {
|
||||
continue;
|
||||
@@ -478,31 +514,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||
for (const auto & entry : dir_it) {
|
||||
try {
|
||||
if (entry.is_regular_file()) {
|
||||
std::string filename = entry.path().filename().string();
|
||||
std::string ext = entry.path().extension().string();
|
||||
std::wstring filename = entry.path().filename().wstring();
|
||||
std::wstring ext = entry.path().extension().wstring();
|
||||
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||
dl_handle_ptr handle { dl_load_library(entry.path()) };
|
||||
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
||||
if (!handle) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (!score_fn) {
|
||||
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
||||
GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
int s = score_fn();
|
||||
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
||||
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
||||
if (s > best_score) {
|
||||
best_score = s;
|
||||
best_path = entry.path();
|
||||
best_path = entry.path().wstring();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what());
|
||||
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -510,7 +546,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||
if (best_score == 0) {
|
||||
// try to load the base backend
|
||||
for (const auto & search_path : search_paths) {
|
||||
fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix());
|
||||
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
||||
if (fs::exists(path)) {
|
||||
return get_reg().load_backend(path, silent);
|
||||
}
|
||||
|
||||
@@ -49,29 +49,29 @@ func (p *Progress) stop() bool {
|
||||
func (p *Progress) Stop() bool {
|
||||
stopped := p.stop()
|
||||
if stopped {
|
||||
fmt.Fprint(p.w, "\n")
|
||||
p.w.Flush()
|
||||
fmt.Fprintln(p.w)
|
||||
}
|
||||
|
||||
// show cursor
|
||||
fmt.Fprint(p.w, "\033[?25h")
|
||||
p.w.Flush()
|
||||
return stopped
|
||||
}
|
||||
|
||||
func (p *Progress) StopAndClear() bool {
|
||||
defer p.w.Flush()
|
||||
|
||||
fmt.Fprint(p.w, "\033[?25l")
|
||||
defer fmt.Fprint(p.w, "\033[?25h")
|
||||
|
||||
stopped := p.stop()
|
||||
if stopped {
|
||||
// clear all progress lines
|
||||
for i := range p.pos {
|
||||
if i > 0 {
|
||||
fmt.Fprint(p.w, "\033[A")
|
||||
}
|
||||
fmt.Fprint(p.w, "\033[2K\033[1G")
|
||||
for range p.pos - 1 {
|
||||
fmt.Fprint(p.w, "\033[A")
|
||||
}
|
||||
|
||||
fmt.Fprint(p.w, "\033[2K", "\033[1G")
|
||||
}
|
||||
|
||||
// show cursor
|
||||
fmt.Fprint(p.w, "\033[?25h")
|
||||
p.w.Flush()
|
||||
return stopped
|
||||
}
|
||||
|
||||
@@ -86,19 +86,13 @@ func (p *Progress) render() {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
defer p.w.Flush()
|
||||
|
||||
// eliminate flickering on terminals that support synchronized output
|
||||
fmt.Fprint(p.w, "\033[?2026h")
|
||||
defer fmt.Fprint(p.w, "\033[?2026l")
|
||||
|
||||
fmt.Fprint(p.w, "\033[?25l")
|
||||
defer fmt.Fprint(p.w, "\033[?25h")
|
||||
|
||||
// move the cursor back to the beginning
|
||||
for range p.pos - 1 {
|
||||
fmt.Fprint(p.w, "\033[A")
|
||||
}
|
||||
|
||||
fmt.Fprint(p.w, "\033[1G")
|
||||
|
||||
// render progress lines
|
||||
@@ -110,10 +104,13 @@ func (p *Progress) render() {
|
||||
}
|
||||
|
||||
p.pos = len(p.states)
|
||||
p.w.Flush()
|
||||
}
|
||||
|
||||
func (p *Progress) start() {
|
||||
p.ticker = time.NewTicker(100 * time.Millisecond)
|
||||
// hide cursor
|
||||
fmt.Fprint(p.w, "\033[?25l")
|
||||
for range p.ticker.C {
|
||||
p.render()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user