mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-13 16:14:24 -05:00
Compare commits
4 Commits
fix/step-f
...
chore/do-n
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
013af9475f | ||
|
|
2e17edd72a | ||
|
|
24aab68b3f | ||
|
|
5bdbb10593 |
@@ -83,7 +83,7 @@ type RunCMD struct {
|
||||
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
|
||||
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
|
||||
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
|
||||
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
|
||||
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
|
||||
|
||||
Version bool
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
laudio "github.com/mudler/LocalAI/pkg/audio"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
@@ -949,7 +950,12 @@ func triggerResponse(session *Session, conv *Conversation, c *LockedWebsocket, o
|
||||
case types.MessageContentTypeInputAudio:
|
||||
textContent += content.Transcript
|
||||
case types.MessageContentTypeInputImage:
|
||||
msg.StringImages = append(msg.StringImages, content.ImageURL)
|
||||
img, err := utils.GetContentURIAsBase64(content.ImageURL)
|
||||
if err != nil {
|
||||
xlog.Warn("Failed to process image", "error", err)
|
||||
continue
|
||||
}
|
||||
msg.StringImages = append(msg.StringImages, img)
|
||||
imgIndex++
|
||||
nrOfImgsInMessage++
|
||||
}
|
||||
|
||||
@@ -175,8 +175,8 @@ type ToolFunction struct {
|
||||
// The description of the function, including guidance on when and how to call it, and guidance about what to tell the user when calling (if anything).
|
||||
Description string `json:"description"`
|
||||
|
||||
// The type of the tool, i.e. function.
|
||||
Parameters any `json:"parameters"`
|
||||
// The jsonschema representing the parameters
|
||||
Parameters any `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
func (t ToolFunction) ToolType() ToolType {
|
||||
|
||||
@@ -1,4 +1,46 @@
|
||||
---
|
||||
- &nanbeige4
|
||||
name: "nanbeige4.1-3b-q8"
|
||||
url: "github:mudler/LocalAI/gallery/nanbeige4.1.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Nanbeige/Nanbeige4.1-3B
|
||||
- https://huggingface.co/Edge-Quant/Nanbeige4.1-3B-Q8_0-GGUF
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png
|
||||
license: apache-2.0
|
||||
description: |
|
||||
Nanbeige4.1-3B is built upon Nanbeige4-3B-Base and represents an enhanced iteration of our previous reasoning model, Nanbeige4-3B-Thinking-2511, achieved through further post-training optimization with supervised fine-tuning (SFT) and reinforcement learning (RL). As a highly competitive open-source model at a small parameter scale, Nanbeige4.1-3B illustrates that compact models can simultaneously achieve robust reasoning, preference alignment, and effective agentic behaviors.
|
||||
|
||||
Key features:
|
||||
Strong Reasoning: Capable of solving complex, multi-step problems through sustained and coherent reasoning within a single forward pass, reliably producing correct answers on benchmarks like LiveCodeBench-Pro, IMO-Answer-Bench, and AIME 2026 I.
|
||||
Robust Preference Alignment: Outperforms same-scale models (e.g., Qwen3-4B-2507, Nanbeige4-3B-2511) and larger models (e.g., Qwen3-30B-A3B, Qwen3-32B) on Arena-Hard-v2 and Multi-Challenge.
|
||||
Agentic Capability: First general small model to natively support deep-search tasks and sustain complex problem-solving with >500 rounds of tool invocations; excels in benchmarks like xBench-DeepSearch (75), Browse-Comp (39), and others.
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- nanbeige
|
||||
- reasoning
|
||||
- agent
|
||||
overrides:
|
||||
parameters:
|
||||
model: nanbeige4.1-3b-q8_0.gguf
|
||||
files:
|
||||
- filename: nanbeige4.1-3b-q8_0.gguf
|
||||
sha256: a5a4379e50605c5e5a31bb1716a211fb16691fea7e13ede7f88796e1f617d9e0
|
||||
uri: huggingface://Edge-Quant/Nanbeige4.1-3B-Q8_0-GGUF/nanbeige4.1-3b-q8_0.gguf
|
||||
- !!merge <<: *nanbeige4
|
||||
name: "nanbeige4.1-3b-q4"
|
||||
urls:
|
||||
- https://huggingface.co/Nanbeige/Nanbeige4.1-3B
|
||||
- https://huggingface.co/Edge-Quant/Nanbeige4.1-3B-Q4_K_M-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: nanbeige4.1-3b-q4_k_m.gguf
|
||||
files:
|
||||
- filename: nanbeige4.1-3b-q4_k_m.gguf
|
||||
sha256: 043246350c952877b38958a9e35c480419008b6b2d52bedaf2b805ed2447b4df
|
||||
uri: huggingface://Edge-Quant/Nanbeige4.1-3B-Q4_K_M-GGUF/nanbeige4.1-3b-q4_k_m.gguf
|
||||
- name: nemo-parakeet-tdt-0.6b
|
||||
license: apache-2.0
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
|
||||
16
gallery/nanbeige4.1.yaml
Normal file
16
gallery/nanbeige4.1.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
---
|
||||
name: nanbeige4.1
|
||||
|
||||
config_file: |
|
||||
backend: llama-cpp
|
||||
function:
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
model: llama-cpp/models/nanbeige4.1-3b-q8_0.gguf
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
@@ -45,9 +45,8 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
cuda13DirExists bool
|
||||
cuda12DirExists bool
|
||||
capabilityLogged bool
|
||||
cuda13DirExists bool
|
||||
cuda12DirExists bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
@@ -72,9 +71,15 @@ func (s *SystemState) Capability(capMap map[string]string) string {
|
||||
}
|
||||
|
||||
func (s *SystemState) getSystemCapabilities() string {
|
||||
|
||||
if s.systemCapabilities != "" {
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
capability := os.Getenv(capabilityEnv)
|
||||
if capability != "" {
|
||||
xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv)
|
||||
s.systemCapabilities = capability
|
||||
return capability
|
||||
}
|
||||
|
||||
@@ -91,20 +96,23 @@ func (s *SystemState) getSystemCapabilities() string {
|
||||
capability, err := os.ReadFile(capabilityRunFile)
|
||||
if err == nil {
|
||||
xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv)
|
||||
return strings.Trim(strings.TrimSpace(string(capability)), "\n")
|
||||
s.systemCapabilities = strings.Trim(strings.TrimSpace(string(capability)), "\n")
|
||||
return s.systemCapabilities
|
||||
}
|
||||
}
|
||||
|
||||
// If we are on mac and arm64, we will return metal
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
||||
xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv)
|
||||
return metal
|
||||
s.systemCapabilities = metal
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
// If we are on mac and x86, we will return darwin-x86
|
||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
|
||||
xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv)
|
||||
return darwinX86
|
||||
s.systemCapabilities = darwinX86
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
||||
@@ -112,39 +120,43 @@ func (s *SystemState) getSystemCapabilities() string {
|
||||
if s.GPUVendor == Nvidia {
|
||||
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
|
||||
if cuda13DirExists {
|
||||
return nvidiaL4TCuda13
|
||||
s.systemCapabilities = nvidiaL4TCuda13
|
||||
return s.systemCapabilities
|
||||
}
|
||||
if cuda12DirExists {
|
||||
return nvidiaL4TCuda12
|
||||
s.systemCapabilities = nvidiaL4TCuda12
|
||||
return s.systemCapabilities
|
||||
}
|
||||
return nvidiaL4T
|
||||
s.systemCapabilities = nvidiaL4T
|
||||
return s.systemCapabilities
|
||||
}
|
||||
}
|
||||
|
||||
if cuda13DirExists {
|
||||
return nvidiaCuda13
|
||||
s.systemCapabilities = nvidiaCuda13
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
if cuda12DirExists {
|
||||
return nvidiaCuda12
|
||||
s.systemCapabilities = nvidiaCuda12
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
if s.GPUVendor == "" {
|
||||
xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv)
|
||||
return defaultCapability
|
||||
s.systemCapabilities = defaultCapability
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
if !capabilityLogged {
|
||||
xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv)
|
||||
capabilityLogged = true
|
||||
}
|
||||
// If vram is less than 4GB, let's default to CPU but warn the user that they can override that via env
|
||||
if s.VRAM <= 4*1024*1024*1024 {
|
||||
xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv)
|
||||
return defaultCapability
|
||||
s.systemCapabilities = defaultCapability
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
return s.GPUVendor
|
||||
s.systemCapabilities = s.GPUVendor
|
||||
return s.systemCapabilities
|
||||
}
|
||||
|
||||
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
||||
|
||||
@@ -19,6 +19,8 @@ type SystemState struct {
|
||||
Backend Backend
|
||||
Model Model
|
||||
VRAM uint64
|
||||
|
||||
systemCapabilities string
|
||||
}
|
||||
|
||||
type SystemStateOptions func(*SystemState)
|
||||
@@ -53,5 +55,7 @@ func GetSystemState(opts ...SystemStateOptions) (*SystemState, error) {
|
||||
state.VRAM, _ = xsysinfo.TotalAvailableVRAM()
|
||||
xlog.Debug("Total available VRAM", "vram", state.VRAM)
|
||||
|
||||
state.getSystemCapabilities()
|
||||
|
||||
return state, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user