mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-14 08:31:10 -05:00
Compare commits
1 Commits
master
...
fix/step-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1f0110368d |
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=05a6f0e8946914918758db767f6eb04bc1e38507
|
LLAMA_VERSION?=338085c69e486b7155e5b03d7b5087e02c0e2528
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -294,6 +294,76 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool template_uses_arguments_items_filter(const std::string & template_src) {
|
||||||
|
return template_src.find("arguments|items") != std::string::npos ||
|
||||||
|
template_src.find("arguments | items") != std::string::npos ||
|
||||||
|
template_src.find("arguments| items") != std::string::npos ||
|
||||||
|
template_src.find("arguments |items") != std::string::npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void normalize_tool_call_arguments_for_template(
|
||||||
|
json & messages,
|
||||||
|
const std::string & template_src,
|
||||||
|
const char * request_name)
|
||||||
|
{
|
||||||
|
if (!messages.is_array() || !template_uses_arguments_items_filter(template_src)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t converted = 0;
|
||||||
|
size_t failed = 0;
|
||||||
|
|
||||||
|
for (auto & message : messages) {
|
||||||
|
if (!message.is_object() || !message.contains("tool_calls") || !message["tool_calls"].is_array()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto & tool_call : message["tool_calls"]) {
|
||||||
|
if (!tool_call.is_object() || !tool_call.contains("function") || !tool_call["function"].is_object()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto & function = tool_call["function"];
|
||||||
|
if (!function.contains("arguments")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto & arguments = function["arguments"];
|
||||||
|
if (!arguments.is_string()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string args_str = arguments.get<std::string>();
|
||||||
|
if (args_str.empty()) {
|
||||||
|
arguments = json::object();
|
||||||
|
converted++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
json parsed_args = json::parse(args_str);
|
||||||
|
if (parsed_args.is_object()) {
|
||||||
|
arguments = parsed_args;
|
||||||
|
converted++;
|
||||||
|
}
|
||||||
|
} catch (const json::parse_error &) {
|
||||||
|
failed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converted > 0) {
|
||||||
|
SRV_INF("[TOOLS DEBUG] %s: Converted %zu tool call argument strings to JSON objects for arguments|items template compatibility\n",
|
||||||
|
request_name,
|
||||||
|
converted);
|
||||||
|
}
|
||||||
|
if (failed > 0) {
|
||||||
|
SRV_WRN("[TOOLS DEBUG] %s: Failed to parse %zu tool call argument strings as JSON for arguments|items template compatibility\n",
|
||||||
|
request_name,
|
||||||
|
failed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const std::vector<ggml_type> kv_cache_types = {
|
const std::vector<ggml_type> kv_cache_types = {
|
||||||
GGML_TYPE_F32,
|
GGML_TYPE_F32,
|
||||||
@@ -417,12 +487,6 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
|||||||
// n_ctx_checkpoints: max context checkpoints per slot (default: 8)
|
// n_ctx_checkpoints: max context checkpoints per slot (default: 8)
|
||||||
params.n_ctx_checkpoints = 8;
|
params.n_ctx_checkpoints = 8;
|
||||||
|
|
||||||
// llama memory fit fails if we don't provide a buffer for tensor overrides
|
|
||||||
const size_t ntbo = llama_max_tensor_buft_overrides();
|
|
||||||
while (params.tensor_buft_overrides.size() < ntbo) {
|
|
||||||
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
|
||||||
}
|
|
||||||
|
|
||||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||||
for (int i = 0; i < request->options_size(); i++) {
|
for (int i = 0; i < request->options_size(); i++) {
|
||||||
std::string opt = request->options(i);
|
std::string opt = request->options(i);
|
||||||
@@ -1261,6 +1325,11 @@ public:
|
|||||||
body_json["add_generation_prompt"] = data["add_generation_prompt"];
|
body_json["add_generation_prompt"] = data["add_generation_prompt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (body_json.contains("messages") && ctx_server.impl->chat_params.tmpls) {
|
||||||
|
const auto template_src = common_chat_templates_source(ctx_server.impl->chat_params.tmpls.get());
|
||||||
|
normalize_tool_call_arguments_for_template(body_json["messages"], template_src, "PredictStream");
|
||||||
|
}
|
||||||
|
|
||||||
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
|
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
|
||||||
SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
|
SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
|
||||||
|
|
||||||
@@ -1992,6 +2061,11 @@ public:
|
|||||||
body_json["add_generation_prompt"] = data["add_generation_prompt"];
|
body_json["add_generation_prompt"] = data["add_generation_prompt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (body_json.contains("messages") && ctx_server.impl->chat_params.tmpls) {
|
||||||
|
const auto template_src = common_chat_templates_source(ctx_server.impl->chat_params.tmpls.get());
|
||||||
|
normalize_tool_call_arguments_for_template(body_json["messages"], template_src, "Predict");
|
||||||
|
}
|
||||||
|
|
||||||
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
|
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
|
||||||
SRV_DBG("[CONVERSATION DEBUG] Predict: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
|
SRV_DBG("[CONVERSATION DEBUG] Predict: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,3 @@ protobuf
|
|||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
setuptools
|
setuptools
|
||||||
h11
|
|
||||||
gradio
|
|
||||||
uvicorn
|
|
||||||
@@ -4,6 +4,4 @@ certifi
|
|||||||
packaging==24.1
|
packaging==24.1
|
||||||
soundfile
|
soundfile
|
||||||
setuptools
|
setuptools
|
||||||
six
|
six
|
||||||
scipy
|
|
||||||
librosa
|
|
||||||
@@ -9,12 +9,7 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
if [ "x${USE_PIP}" == "xtrue" ]; then
|
|
||||||
pip install "setuptools<70.0.0"
|
|
||||||
else
|
|
||||||
uv pip install "setuptools<70.0.0"
|
|
||||||
fi
|
|
||||||
# Apply patch to fix PyTorch compatibility issue in voxcpm
|
# Apply patch to fix PyTorch compatibility issue in voxcpm
|
||||||
# This fixes the "Dimension out of range" error in scaled_dot_product_attention
|
# This fixes the "Dimension out of range" error in scaled_dot_product_attention
|
||||||
# by changing .contiguous() to .unsqueeze(0) in the attention module
|
# by changing .contiguous() to .unsqueeze(0) in the attention module
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ type RunCMD struct {
|
|||||||
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
|
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
|
||||||
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
|
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
|
||||||
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
|
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
|
||||||
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
|
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
|
||||||
|
|
||||||
Version bool
|
Version bool
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/templates"
|
"github.com/mudler/LocalAI/core/templates"
|
||||||
laudio "github.com/mudler/LocalAI/pkg/audio"
|
laudio "github.com/mudler/LocalAI/pkg/audio"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
model "github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||||
@@ -950,12 +949,7 @@ func triggerResponse(session *Session, conv *Conversation, c *LockedWebsocket, o
|
|||||||
case types.MessageContentTypeInputAudio:
|
case types.MessageContentTypeInputAudio:
|
||||||
textContent += content.Transcript
|
textContent += content.Transcript
|
||||||
case types.MessageContentTypeInputImage:
|
case types.MessageContentTypeInputImage:
|
||||||
img, err := utils.GetContentURIAsBase64(content.ImageURL)
|
msg.StringImages = append(msg.StringImages, content.ImageURL)
|
||||||
if err != nil {
|
|
||||||
xlog.Warn("Failed to process image", "error", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
msg.StringImages = append(msg.StringImages, img)
|
|
||||||
imgIndex++
|
imgIndex++
|
||||||
nrOfImgsInMessage++
|
nrOfImgsInMessage++
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -175,8 +175,8 @@ type ToolFunction struct {
|
|||||||
// The description of the function, including guidance on when and how to call it, and guidance about what to tell the user when calling (if anything).
|
// The description of the function, including guidance on when and how to call it, and guidance about what to tell the user when calling (if anything).
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
|
|
||||||
// The jsonschema representing the parameters
|
// The type of the tool, i.e. function.
|
||||||
Parameters any `json:"parameters,omitempty"`
|
Parameters any `json:"parameters"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t ToolFunction) ToolType() ToolType {
|
func (t ToolFunction) ToolType() ToolType {
|
||||||
|
|||||||
@@ -1,46 +1,4 @@
|
|||||||
---
|
---
|
||||||
- &nanbeige4
|
|
||||||
name: "nanbeige4.1-3b-q8"
|
|
||||||
url: "github:mudler/LocalAI/gallery/nanbeige4.1.yaml@master"
|
|
||||||
urls:
|
|
||||||
- https://huggingface.co/Nanbeige/Nanbeige4.1-3B
|
|
||||||
- https://huggingface.co/Edge-Quant/Nanbeige4.1-3B-Q8_0-GGUF
|
|
||||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png
|
|
||||||
license: apache-2.0
|
|
||||||
description: |
|
|
||||||
Nanbeige4.1-3B is built upon Nanbeige4-3B-Base and represents an enhanced iteration of our previous reasoning model, Nanbeige4-3B-Thinking-2511, achieved through further post-training optimization with supervised fine-tuning (SFT) and reinforcement learning (RL). As a highly competitive open-source model at a small parameter scale, Nanbeige4.1-3B illustrates that compact models can simultaneously achieve robust reasoning, preference alignment, and effective agentic behaviors.
|
|
||||||
|
|
||||||
Key features:
|
|
||||||
Strong Reasoning: Capable of solving complex, multi-step problems through sustained and coherent reasoning within a single forward pass, reliably producing correct answers on benchmarks like LiveCodeBench-Pro, IMO-Answer-Bench, and AIME 2026 I.
|
|
||||||
Robust Preference Alignment: Outperforms same-scale models (e.g., Qwen3-4B-2507, Nanbeige4-3B-2511) and larger models (e.g., Qwen3-30B-A3B, Qwen3-32B) on Arena-Hard-v2 and Multi-Challenge.
|
|
||||||
Agentic Capability: First general small model to natively support deep-search tasks and sustain complex problem-solving with >500 rounds of tool invocations; excels in benchmarks like xBench-DeepSearch (75), Browse-Comp (39), and others.
|
|
||||||
tags:
|
|
||||||
- llm
|
|
||||||
- gguf
|
|
||||||
- gpu
|
|
||||||
- cpu
|
|
||||||
- nanbeige
|
|
||||||
- reasoning
|
|
||||||
- agent
|
|
||||||
overrides:
|
|
||||||
parameters:
|
|
||||||
model: nanbeige4.1-3b-q8_0.gguf
|
|
||||||
files:
|
|
||||||
- filename: nanbeige4.1-3b-q8_0.gguf
|
|
||||||
sha256: a5a4379e50605c5e5a31bb1716a211fb16691fea7e13ede7f88796e1f617d9e0
|
|
||||||
uri: huggingface://Edge-Quant/Nanbeige4.1-3B-Q8_0-GGUF/nanbeige4.1-3b-q8_0.gguf
|
|
||||||
- !!merge <<: *nanbeige4
|
|
||||||
name: "nanbeige4.1-3b-q4"
|
|
||||||
urls:
|
|
||||||
- https://huggingface.co/Nanbeige/Nanbeige4.1-3B
|
|
||||||
- https://huggingface.co/Edge-Quant/Nanbeige4.1-3B-Q4_K_M-GGUF
|
|
||||||
overrides:
|
|
||||||
parameters:
|
|
||||||
model: nanbeige4.1-3b-q4_k_m.gguf
|
|
||||||
files:
|
|
||||||
- filename: nanbeige4.1-3b-q4_k_m.gguf
|
|
||||||
sha256: 043246350c952877b38958a9e35c480419008b6b2d52bedaf2b805ed2447b4df
|
|
||||||
uri: huggingface://Edge-Quant/Nanbeige4.1-3B-Q4_K_M-GGUF/nanbeige4.1-3b-q4_k_m.gguf
|
|
||||||
- name: nemo-parakeet-tdt-0.6b
|
- name: nemo-parakeet-tdt-0.6b
|
||||||
license: apache-2.0
|
license: apache-2.0
|
||||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||||
|
|||||||
@@ -1,16 +0,0 @@
|
|||||||
---
|
|
||||||
name: nanbeige4.1
|
|
||||||
|
|
||||||
config_file: |
|
|
||||||
backend: llama-cpp
|
|
||||||
function:
|
|
||||||
grammar:
|
|
||||||
disable: true
|
|
||||||
known_usecases:
|
|
||||||
- chat
|
|
||||||
options:
|
|
||||||
- use_jinja:true
|
|
||||||
parameters:
|
|
||||||
model: llama-cpp/models/nanbeige4.1-3b-q8_0.gguf
|
|
||||||
template:
|
|
||||||
use_tokenizer_template: true
|
|
||||||
@@ -45,8 +45,9 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
cuda13DirExists bool
|
cuda13DirExists bool
|
||||||
cuda12DirExists bool
|
cuda12DirExists bool
|
||||||
|
capabilityLogged bool
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@@ -71,15 +72,9 @@ func (s *SystemState) Capability(capMap map[string]string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *SystemState) getSystemCapabilities() string {
|
func (s *SystemState) getSystemCapabilities() string {
|
||||||
|
|
||||||
if s.systemCapabilities != "" {
|
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
|
||||||
|
|
||||||
capability := os.Getenv(capabilityEnv)
|
capability := os.Getenv(capabilityEnv)
|
||||||
if capability != "" {
|
if capability != "" {
|
||||||
xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv)
|
xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv)
|
||||||
s.systemCapabilities = capability
|
|
||||||
return capability
|
return capability
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,23 +91,20 @@ func (s *SystemState) getSystemCapabilities() string {
|
|||||||
capability, err := os.ReadFile(capabilityRunFile)
|
capability, err := os.ReadFile(capabilityRunFile)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv)
|
xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv)
|
||||||
s.systemCapabilities = strings.Trim(strings.TrimSpace(string(capability)), "\n")
|
return strings.Trim(strings.TrimSpace(string(capability)), "\n")
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are on mac and arm64, we will return metal
|
// If we are on mac and arm64, we will return metal
|
||||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||||
xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv)
|
xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv)
|
||||||
s.systemCapabilities = metal
|
return metal
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are on mac and x86, we will return darwin-x86
|
// If we are on mac and x86, we will return darwin-x86
|
||||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
|
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
|
||||||
xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv)
|
xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv)
|
||||||
s.systemCapabilities = darwinX86
|
return darwinX86
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
||||||
@@ -120,43 +112,39 @@ func (s *SystemState) getSystemCapabilities() string {
|
|||||||
if s.GPUVendor == Nvidia {
|
if s.GPUVendor == Nvidia {
|
||||||
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
|
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
|
||||||
if cuda13DirExists {
|
if cuda13DirExists {
|
||||||
s.systemCapabilities = nvidiaL4TCuda13
|
return nvidiaL4TCuda13
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
if cuda12DirExists {
|
if cuda12DirExists {
|
||||||
s.systemCapabilities = nvidiaL4TCuda12
|
return nvidiaL4TCuda12
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
s.systemCapabilities = nvidiaL4T
|
return nvidiaL4T
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cuda13DirExists {
|
if cuda13DirExists {
|
||||||
s.systemCapabilities = nvidiaCuda13
|
return nvidiaCuda13
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if cuda12DirExists {
|
if cuda12DirExists {
|
||||||
s.systemCapabilities = nvidiaCuda12
|
return nvidiaCuda12
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.GPUVendor == "" {
|
if s.GPUVendor == "" {
|
||||||
xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv)
|
xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv)
|
||||||
s.systemCapabilities = defaultCapability
|
return defaultCapability
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !capabilityLogged {
|
||||||
|
xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv)
|
||||||
|
capabilityLogged = true
|
||||||
|
}
|
||||||
// If vram is less than 4GB, let's default to CPU but warn the user that they can override that via env
|
// If vram is less than 4GB, let's default to CPU but warn the user that they can override that via env
|
||||||
if s.VRAM <= 4*1024*1024*1024 {
|
if s.VRAM <= 4*1024*1024*1024 {
|
||||||
xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv)
|
xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv)
|
||||||
s.systemCapabilities = defaultCapability
|
return defaultCapability
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s.systemCapabilities = s.GPUVendor
|
return s.GPUVendor
|
||||||
return s.systemCapabilities
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
||||||
|
|||||||
@@ -19,8 +19,6 @@ type SystemState struct {
|
|||||||
Backend Backend
|
Backend Backend
|
||||||
Model Model
|
Model Model
|
||||||
VRAM uint64
|
VRAM uint64
|
||||||
|
|
||||||
systemCapabilities string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type SystemStateOptions func(*SystemState)
|
type SystemStateOptions func(*SystemState)
|
||||||
@@ -55,7 +53,5 @@ func GetSystemState(opts ...SystemStateOptions) (*SystemState, error) {
|
|||||||
state.VRAM, _ = xsysinfo.TotalAvailableVRAM()
|
state.VRAM, _ = xsysinfo.TotalAvailableVRAM()
|
||||||
xlog.Debug("Total available VRAM", "vram", state.VRAM)
|
xlog.Debug("Total available VRAM", "vram", state.VRAM)
|
||||||
|
|
||||||
state.getSystemCapabilities()
|
|
||||||
|
|
||||||
return state, nil
|
return state, nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user