mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-01 20:07:18 -04:00
Compare commits
10 Commits
v4.5.6
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e9154d4a3a | ||
|
|
0d8adfc59a | ||
|
|
43f2615e19 | ||
|
|
875c539ad5 | ||
|
|
d641ded194 | ||
|
|
40445fff05 | ||
|
|
057dee956a | ||
|
|
4ec39bb776 | ||
|
|
25ecb9f015 | ||
|
|
2be495f9c0 |
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=f74a6fb87b315b2c3154166e075360e15021a61d
|
||||
IK_LLAMA_VERSION?=29431b31c89e79c10f8736e8f2742485ba1713d6
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=6f4f53f2b7da54fcdbbecaaa734337c337ad6176
|
||||
LLAMA_VERSION?=0eca4d490e591d4e93058d07540cf47278a72577
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=3b6c9ca97cfcda8e68e719e6670d06379fcbe943
|
||||
STABLEDIFFUSION_GGML_VERSION?=484baa41e5e006c52dcd4addc38c830b9489745f
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -798,6 +798,7 @@ void sd_img_gen_params_set_seed(sd_img_gen_params_t *params, int64_t seed) {
|
||||
int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char* ref_images[], int ref_images_count) {
|
||||
|
||||
sd_image_t* results;
|
||||
int num_results_out = 0;
|
||||
|
||||
std::vector<int> skip_layers = {7, 8, 9};
|
||||
|
||||
@@ -994,10 +995,14 @@ int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, cha
|
||||
sd_ctx_params_to_str(&ctx_params),
|
||||
sd_img_gen_params_to_str(p));
|
||||
|
||||
results = generate_image(sd_c, p);
|
||||
bool gen_ok = generate_image(sd_c, p, &results, &num_results_out);
|
||||
|
||||
std::free(p);
|
||||
|
||||
if (!gen_ok || num_results_out == 0) {
|
||||
results = NULL;
|
||||
}
|
||||
|
||||
if (results == NULL) {
|
||||
fprintf (stderr, "NO results\n");
|
||||
if (input_image_buffer) free(input_image_buffer);
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=0ae02cdb2c7317b50991367c165736ce42ed96ac
|
||||
WHISPER_CPP_VERSION?=0874de3e8e8e48361dba85c7fe6d176f008bf158
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -4,7 +4,7 @@ torchaudio
|
||||
torchvision
|
||||
|
||||
# Core dependencies
|
||||
transformers>=4.51.0,<4.58.0
|
||||
transformers>=5.12.1,<5.13.0
|
||||
diffusers
|
||||
gradio
|
||||
matplotlib>=3.7.5
|
||||
|
||||
@@ -4,7 +4,7 @@ torchaudio
|
||||
torchvision
|
||||
|
||||
# Core dependencies
|
||||
transformers>=4.51.0,<4.58.0
|
||||
transformers>=5.12.1,<5.13.0
|
||||
diffusers
|
||||
gradio>=6.5.1
|
||||
matplotlib>=3.7.5
|
||||
|
||||
@@ -4,7 +4,7 @@ torchaudio
|
||||
torchvision
|
||||
|
||||
# Core dependencies
|
||||
transformers>=4.51.0,<4.58.0
|
||||
transformers>=5.12.1,<5.13.0
|
||||
diffusers
|
||||
gradio>=6.5.1
|
||||
matplotlib>=3.7.5
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch==2.10.0+rocm7.0
|
||||
torch==2.12.0+cpu
|
||||
torchaudio
|
||||
torchvision
|
||||
|
||||
# Core dependencies
|
||||
transformers>=4.51.0,<4.58.0
|
||||
transformers>=5.12.1,<5.13.0
|
||||
diffusers
|
||||
gradio>=6.5.1
|
||||
matplotlib>=3.7.5
|
||||
|
||||
@@ -4,7 +4,7 @@ torchaudio
|
||||
torchvision
|
||||
|
||||
# Core dependencies
|
||||
transformers>=4.51.0,<4.58.0
|
||||
transformers>=5.12.1,<5.13.0
|
||||
diffusers
|
||||
gradio
|
||||
matplotlib>=3.7.5
|
||||
|
||||
@@ -3,7 +3,7 @@ torch
|
||||
torchaudio
|
||||
torchvision
|
||||
# Core dependencies
|
||||
transformers>=4.51.0,<4.58.0
|
||||
transformers>=5.12.1,<5.13.0
|
||||
diffusers
|
||||
gradio>=6.5.1
|
||||
matplotlib>=3.7.5
|
||||
|
||||
@@ -3,7 +3,7 @@ torchaudio
|
||||
torchvision
|
||||
|
||||
# Core dependencies
|
||||
transformers>=4.51.0,<4.58.0
|
||||
transformers>=5.12.1,<5.13.0
|
||||
diffusers
|
||||
gradio
|
||||
matplotlib>=3.7.5
|
||||
|
||||
@@ -3,5 +3,5 @@ opencv-python
|
||||
accelerate
|
||||
peft
|
||||
inference
|
||||
torch==2.7.1
|
||||
torch==2.12.0+cu130
|
||||
optimum-quanto
|
||||
@@ -1,4 +1,4 @@
|
||||
torch==2.7.1
|
||||
torch==2.12.0+cu130
|
||||
rfdetr
|
||||
opencv-python
|
||||
accelerate
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch==2.9.1
|
||||
torch==2.12.0+cu130
|
||||
rfdetr
|
||||
opencv-python
|
||||
accelerate
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch==2.10.0+rocm7.0
|
||||
torch==2.12.0+cu130
|
||||
torchvision==0.25.0+rocm7.0
|
||||
rfdetr
|
||||
opencv-python
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
torch==2.7.1
|
||||
torch==2.12.0+cu130
|
||||
rfdetr
|
||||
opencv-python
|
||||
accelerate
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
accelerate
|
||||
torch==2.9.0
|
||||
torch==2.12.0+cpu
|
||||
torchvision
|
||||
torchaudio
|
||||
transformers
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# for cublas12 so uv consults this index alongside PyPI.
|
||||
--extra-index-url https://download.pytorch.org/whl/cu128
|
||||
accelerate
|
||||
torch==2.9.1
|
||||
torch==2.12.0+cpu
|
||||
torchvision
|
||||
torchaudio
|
||||
transformers
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
trl
|
||||
peft
|
||||
datasets>=3.0.0
|
||||
transformers>=4.56.2
|
||||
transformers>=5.12.1
|
||||
accelerate>=1.4.0
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
trl
|
||||
peft
|
||||
datasets>=3.0.0
|
||||
transformers>=4.56.2
|
||||
transformers>=5.12.1
|
||||
accelerate>=1.4.0
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
trl
|
||||
peft
|
||||
datasets>=3.0.0
|
||||
transformers>=4.56.2
|
||||
transformers>=5.12.1
|
||||
accelerate>=1.4.0
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
trl
|
||||
peft
|
||||
datasets>=3.0.0
|
||||
transformers>=4.56.2
|
||||
transformers>=5.12.1
|
||||
accelerate>=1.4.0
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
torch==2.7.0
|
||||
torch==2.12.0+cu130
|
||||
transformers
|
||||
bitsandbytes
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
# on a cu130 host. Pull the cu130-flavoured wheel from vLLM's per-tag index
|
||||
# instead — the cublas13 case in install.sh adds --index-strategy=unsafe-best-match
|
||||
# so uv consults this index alongside PyPI.
|
||||
--extra-index-url https://wheels.vllm.ai/0.23.0/cu130
|
||||
--extra-index-url https://wheels.vllm.ai/0.24.0/cu130
|
||||
# VERSION COUPLING: darwin/Apple-Silicon builds use vllm-metal (see install.sh),
|
||||
# which pins this exact vLLM version. Bumping vllm here means coordinating with a
|
||||
# vllm-metal release that supports the new version, or macOS/Metal builds break.
|
||||
vllm==0.23.0
|
||||
vllm==0.24.0
|
||||
|
||||
@@ -351,6 +351,16 @@ impl Backend for KokorosService {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
type AudioTranscriptionLiveStream =
|
||||
ReceiverStream<Result<backend::TranscriptLiveResponse, Status>>;
|
||||
|
||||
async fn audio_transcription_live(
|
||||
&self,
|
||||
_: Request<tonic::Streaming<backend::TranscriptLiveRequest>>,
|
||||
) -> Result<Response<Self::AudioTranscriptionLiveStream>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn diarize(
|
||||
&self,
|
||||
_: Request<backend::DiarizeRequest>,
|
||||
|
||||
@@ -207,12 +207,20 @@ func (l *Launcher) StartLocalAI() error {
|
||||
}
|
||||
|
||||
// Build command arguments
|
||||
dataPath := l.GetDataPath()
|
||||
args := []string{
|
||||
"run",
|
||||
"--models-path", l.config.ModelsPath,
|
||||
"--backends-path", l.config.BackendsPath,
|
||||
"--address", l.config.Address,
|
||||
"--log-level", l.config.LogLevel,
|
||||
// Keep persistent data and dynamic config under the launcher's data
|
||||
// directory (~/.localai) rather than letting the server resolve them
|
||||
// to ${basepath}/{data,configuration}. ${basepath} expands to the
|
||||
// launcher process's CWD (often the user's home root), which puts
|
||||
// ~/data and ~/configuration outside ~/.localai. See #10610.
|
||||
"--data-path", filepath.Join(dataPath, "data"),
|
||||
"--localai-config-dir", filepath.Join(dataPath, "configuration"),
|
||||
}
|
||||
|
||||
l.localaiCmd = exec.CommandContext(l.ctx, binaryPath, args...)
|
||||
|
||||
@@ -1716,7 +1716,7 @@
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0.15
|
||||
model: llama-cpp/models/LFM2.5-8B-A1B-GGUF/LFM2.5-8B-A1B-Q4_K_M.gguf
|
||||
model: llama-cpp/models/LFM2.5-8B-A1B-GGUF/LFM2.5-8B-A1B-Q8_0.gguf
|
||||
repeat_penalty: 1.05
|
||||
temperature: 0.1
|
||||
top_k: 50
|
||||
@@ -1724,9 +1724,9 @@
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/LFM2.5-8B-A1B-GGUF/LFM2.5-8B-A1B-Q4_K_M.gguf
|
||||
uri: https://huggingface.co/LiquidAI/LFM2.5-8B-A1B-GGUF/resolve/main/LFM2.5-8B-A1B-Q4_K_M.gguf
|
||||
sha256: 4923ec14f06b968b74d663e5949867d2d9c3bf13a20b8be1a9f9af39989b2bb0
|
||||
- filename: llama-cpp/models/LFM2.5-8B-A1B-GGUF/LFM2.5-8B-A1B-Q8_0.gguf
|
||||
uri: https://huggingface.co/LiquidAI/LFM2.5-8B-A1B-GGUF/resolve/main/LFM2.5-8B-A1B-Q8_0.gguf
|
||||
sha256: 33ab3b8ce6a964fb8ebac89360c9b3cf72c4fa418d5e4c0a94d46883124d5c02
|
||||
- name: "qwopus3.5-9b-coder-mtp"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -58,6 +58,23 @@ func IsLiveTranscriptionUnsupported(err error) bool {
|
||||
return strings.Contains(strings.ToLower(err.Error()), "unimplemented")
|
||||
}
|
||||
|
||||
// IsUnimplemented reports whether err is a gRPC Unimplemented status — the
|
||||
// signal a backend gives for an RPC it does not implement. The generated
|
||||
// UnimplementedBackendServer stub returns exactly this for any RPC a backend
|
||||
// (e.g. a Python or external backend) has not overridden, so callers can treat
|
||||
// an optional RPC as a no-op rather than a failure. Prefers the typed status
|
||||
// code and falls back to the message for paths that lose the status (e.g. errors
|
||||
// wrapped across non-gRPC boundaries).
|
||||
func IsUnimplemented(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if status.Code(err) == codes.Unimplemented {
|
||||
return true
|
||||
}
|
||||
return strings.Contains(strings.ToLower(err.Error()), "unimplemented")
|
||||
}
|
||||
|
||||
// StreamTranscriptionUnsupported returns the canonical error a backend returns
|
||||
// when it (or the loaded model) cannot serve the server-streaming
|
||||
// AudioTranscriptionStream RPC. It carries codes.Unimplemented like the live
|
||||
|
||||
@@ -55,6 +55,18 @@ var _ = Describe("grpcerrors", func() {
|
||||
Expect(grpcerrors.IsModelNotLoaded(err)).To(BeFalse())
|
||||
})
|
||||
|
||||
DescribeTable("IsUnimplemented",
|
||||
func(err error, want bool) {
|
||||
Expect(grpcerrors.IsUnimplemented(err)).To(Equal(want))
|
||||
},
|
||||
Entry("nil", nil, false),
|
||||
Entry("typed code", status.Error(codes.Unimplemented, "method Free not implemented"), true),
|
||||
Entry("stale stub message (Unknown code)", errors.New("rpc error: code = Unimplemented desc = "), true),
|
||||
Entry("unrelated error", errors.New("context deadline exceeded"), false),
|
||||
Entry("unrelated grpc code", status.Error(codes.Unavailable, "connection refused"), false),
|
||||
Entry("model not loaded is NOT unimplemented", grpcerrors.ModelNotLoaded("parakeet-cpp"), false),
|
||||
)
|
||||
|
||||
It("StreamTranscriptionUnsupported carries Unimplemented and is not ModelNotLoaded", func() {
|
||||
err := grpcerrors.StreamTranscriptionUnsupported("parakeet-cpp", "not a streaming model")
|
||||
Expect(status.Code(err)).To(Equal(codes.Unimplemented))
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/hpcloud/tail"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/grpcerrors"
|
||||
"github.com/mudler/LocalAI/pkg/signals"
|
||||
process "github.com/mudler/go-processmanager"
|
||||
"github.com/mudler/xlog"
|
||||
@@ -52,10 +53,21 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
hook(s)
|
||||
}
|
||||
|
||||
// Free GPU resources before stopping the process to ensure VRAM is released
|
||||
// Free GPU resources before stopping the process to ensure VRAM is released.
|
||||
// Free is optional: backends that don't override it (the generated stub, many
|
||||
// Python/external backends, or a federation proxy in distributed mode) return
|
||||
// gRPC Unimplemented. That is expected, not a failure — VRAM is reclaimed when
|
||||
// the process is stopped below, or by the remote unloader for remote backends —
|
||||
// so don't surface it as an error.
|
||||
xlog.Debug("Calling Free() to release GPU resources", "model", s)
|
||||
if err := model.GRPC(false, ml.wd).Free(context.Background()); err != nil {
|
||||
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
|
||||
if grpcerrors.IsUnimplemented(err) {
|
||||
xlog.Debug("Backend does not implement Free(); GPU release handled on process stop", "model", s)
|
||||
} else {
|
||||
// Now that the expected Unimplemented case is filtered out above, a
|
||||
// remaining error is a genuine failure to release VRAM — surface it.
|
||||
xlog.Error("Error freeing GPU resources", "error", err, "model", s)
|
||||
}
|
||||
}
|
||||
|
||||
process := model.Process()
|
||||
|
||||
Reference in New Issue
Block a user