mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-20 22:58:34 -04:00
Compare commits
1 Commits
master
...
worktree-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
694088ebfe |
@@ -1,10 +1,10 @@
|
||||
# ds4 backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as DS4_VERSION?=2606543be7a8c125a32cee37f5d1d85dc78f2fcf
|
||||
# Upstream pin lives below as DS4_VERSION?=599e49d253971451f710cb8323344e789906ed6c
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# llama-cpp / ik-llama-cpp / turboquant convention.
|
||||
|
||||
DS4_VERSION?=2606543be7a8c125a32cee37f5d1d85dc78f2fcf
|
||||
DS4_VERSION?=599e49d253971451f710cb8323344e789906ed6c
|
||||
DS4_REPO?=https://github.com/antirez/ds4
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=11a1fea9e291f12ce2c803a9d7812c30ca806bcf
|
||||
IK_LLAMA_VERSION?=77413bc900f9a2bfd8a5407f184427bcc0825f6c
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# acestep.cpp version
|
||||
ACESTEP_REPO?=https://github.com/ace-step/acestep.cpp
|
||||
ACESTEP_CPP_VERSION?=ed53caf164e4492a5620b2e3f2264629cf66da24
|
||||
ACESTEP_CPP_VERSION?=e0c8d75a672fca5684c88c68dbf6d12f58754258
|
||||
SO_TARGET?=libgoacestepcpp.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -22,11 +22,12 @@
|
||||
#include <vector>
|
||||
|
||||
// Global model contexts (loaded once, reused across requests)
|
||||
static DiTGGML g_dit = {};
|
||||
static VAEGGML g_vae = {};
|
||||
static bool g_dit_loaded = false;
|
||||
static bool g_vae_loaded = false;
|
||||
static bool g_is_turbo = false;
|
||||
static DiTGGML g_dit = {};
|
||||
static DiTGGMLConfig g_dit_cfg;
|
||||
static VAEGGML g_vae = {};
|
||||
static bool g_dit_loaded = false;
|
||||
static bool g_vae_loaded = false;
|
||||
static bool g_is_turbo = false;
|
||||
|
||||
// Silence latent [15000, 64] — read once from DiT GGUF
|
||||
static std::vector<float> g_silence_full;
|
||||
@@ -71,9 +72,10 @@ int load_model(const char * lm_model_path, const char * text_encoder_path,
|
||||
g_text_enc_path = text_encoder_path;
|
||||
g_dit_path = dit_model_path;
|
||||
|
||||
// Load DiT model (backend init + config are handled inside dit_ggml_load)
|
||||
// Load DiT model
|
||||
fprintf(stderr, "[acestep-cpp] Loading DiT from %s\n", dit_model_path);
|
||||
if (!dit_ggml_load(&g_dit, dit_model_path)) {
|
||||
dit_ggml_init_backend(&g_dit);
|
||||
if (!dit_ggml_load(&g_dit, dit_model_path, g_dit_cfg, nullptr, 0.0f)) {
|
||||
fprintf(stderr, "[acestep-cpp] FATAL: failed to load DiT from %s\n", dit_model_path);
|
||||
return 1;
|
||||
}
|
||||
@@ -147,16 +149,16 @@ int generate_music(const char * caption, const char * lyrics, int bpm,
|
||||
|
||||
// Compute T (latent frames at 25Hz)
|
||||
int T = (int)(duration * FRAMES_PER_SECOND);
|
||||
T = ((T + g_dit.cfg.patch_size - 1) / g_dit.cfg.patch_size) * g_dit.cfg.patch_size;
|
||||
int S = T / g_dit.cfg.patch_size;
|
||||
T = ((T + g_dit_cfg.patch_size - 1) / g_dit_cfg.patch_size) * g_dit_cfg.patch_size;
|
||||
int S = T / g_dit_cfg.patch_size;
|
||||
|
||||
if (T > 15000) {
|
||||
fprintf(stderr, "[acestep-cpp] ERROR: T=%d exceeds max 15000\n", T);
|
||||
return 2;
|
||||
}
|
||||
|
||||
int Oc = g_dit.cfg.out_channels; // 64
|
||||
int ctx_ch = g_dit.cfg.in_channels - Oc; // 128
|
||||
int Oc = g_dit_cfg.out_channels; // 64
|
||||
int ctx_ch = g_dit_cfg.in_channels - Oc; // 128
|
||||
|
||||
fprintf(stderr, "[acestep-cpp] T=%d, S=%d, duration=%.1fs, seed=%d\n", T, S, duration, seed);
|
||||
|
||||
@@ -189,8 +191,9 @@ int generate_music(const char * caption, const char * lyrics, int bpm,
|
||||
|
||||
fprintf(stderr, "[acestep-cpp] caption: %d tokens, lyrics: %d tokens\n", S_text, S_lyric);
|
||||
|
||||
// 4. Text encoder forward (backend init handled inside qwen3_load_text_encoder)
|
||||
// 4. Text encoder forward
|
||||
Qwen3GGML text_enc = {};
|
||||
qwen3_init_backend(&text_enc);
|
||||
if (!qwen3_load_text_encoder(&text_enc, g_text_enc_path.c_str())) {
|
||||
fprintf(stderr, "[acestep-cpp] FATAL: failed to load text encoder\n");
|
||||
return 4;
|
||||
@@ -206,8 +209,9 @@ int generate_music(const char * caption, const char * lyrics, int bpm,
|
||||
std::vector<float> lyric_embed(H_text * S_lyric);
|
||||
qwen3_embed_lookup(&text_enc, lyric_ids.data(), S_lyric, lyric_embed.data());
|
||||
|
||||
// 6. Condition encoder (backend init handled inside cond_ggml_load)
|
||||
// 6. Condition encoder
|
||||
CondGGML cond = {};
|
||||
cond_ggml_init_backend(&cond);
|
||||
if (!cond_ggml_load(&cond, g_dit_path.c_str())) {
|
||||
fprintf(stderr, "[acestep-cpp] FATAL: failed to load condition encoder\n");
|
||||
qwen3_free(&text_enc);
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=5b0267e941cade15bd80089d89838795d9f4baa6
|
||||
STABLEDIFFUSION_GGML_VERSION?=bd17f53b7386fb5f60e8587b75e73c4b2fed3426
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -1188,9 +1188,6 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int
|
||||
p->high_noise_sample_params.scheduler = scheduler;
|
||||
p->high_noise_sample_params.flow_shift = flow_shift;
|
||||
|
||||
// Pin output fps in params; upstream uses it for audio sync (and we also mux at this rate).
|
||||
p->fps = fps;
|
||||
|
||||
// Load init/end reference images if provided (resized to output dims).
|
||||
uint8_t* init_buf = nullptr;
|
||||
uint8_t* end_buf = nullptr;
|
||||
@@ -1209,14 +1206,11 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int
|
||||
|
||||
// Generate
|
||||
int num_frames_out = 0;
|
||||
sd_image_t* frames = nullptr;
|
||||
sd_audio_t* audio = nullptr;
|
||||
bool ok = generate_video(sd_c, p, &frames, &num_frames_out, &audio);
|
||||
sd_image_t* frames = generate_video(sd_c, p, &num_frames_out);
|
||||
std::free(p);
|
||||
|
||||
if (!ok || !frames || num_frames_out == 0) {
|
||||
if (!frames || num_frames_out == 0) {
|
||||
fprintf(stderr, "generate_video produced no frames\n");
|
||||
if (audio) free_sd_audio(audio);
|
||||
if (init_buf) free(init_buf);
|
||||
if (end_buf) free(end_buf);
|
||||
return 1;
|
||||
@@ -1230,7 +1224,6 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int
|
||||
if (frames[i].data) free(frames[i].data);
|
||||
}
|
||||
free(frames);
|
||||
if (audio) free_sd_audio(audio);
|
||||
if (init_buf) free(init_buf);
|
||||
if (end_buf) free(end_buf);
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ torch==2.7.1
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers>=5.8.1
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.5.0
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
soundfile
|
||||
protobuf==6.33.5
|
||||
@@ -2,9 +2,9 @@ torch==2.7.1
|
||||
accelerate
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers>=5.8.1
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.5.0
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
soundfile
|
||||
protobuf==6.33.5
|
||||
@@ -2,9 +2,9 @@
|
||||
torch==2.9.0
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers>=5.8.1
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.5.0
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
soundfile
|
||||
protobuf==6.33.5
|
||||
@@ -1,11 +1,11 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch==2.10.0+rocm7.0
|
||||
accelerate
|
||||
transformers>=5.8.1
|
||||
transformers>=5.8.0
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.5.0
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
soundfile
|
||||
protobuf==6.33.5
|
||||
@@ -3,9 +3,9 @@ torch
|
||||
optimum[openvino]
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers>=5.8.1
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.5.0
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
soundfile
|
||||
protobuf==6.33.5
|
||||
@@ -2,9 +2,9 @@ torch==2.7.1
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers>=5.8.1
|
||||
transformers>=5.8.0
|
||||
bitsandbytes
|
||||
sentence-transformers==5.5.0
|
||||
sentence-transformers==5.4.0
|
||||
diffusers
|
||||
soundfile
|
||||
protobuf==6.33.5
|
||||
|
||||
115
core/services/skills/skills_mcp_test.go
Normal file
115
core/services/skills/skills_mcp_test.go
Normal file
@@ -0,0 +1,115 @@
|
||||
package skills_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/modelcontextprotocol/go-sdk/mcp"
|
||||
agiSkills "github.com/mudler/LocalAGI/services/skills"
|
||||
localskills "github.com/mudler/LocalAI/core/services/skills"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestSkillsMCP(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Skills MCP test")
|
||||
}
|
||||
|
||||
// listSkillsResult mirrors the output struct of skillserver's list_skills tool.
|
||||
type listSkillsResult struct {
|
||||
Skills []struct {
|
||||
ID string `json:"id"`
|
||||
Description string `json:"description,omitempty"`
|
||||
} `json:"skills"`
|
||||
}
|
||||
|
||||
// Exercises the same wire the agent uses at runtime: open an in-process
|
||||
// MCP session via LocalAGI's skills.Service, create a skill through the
|
||||
// LocalAI FilesystemManager, then list_skills on the still-open session.
|
||||
// Guards against regressions in the manager <-> MCP session lifecycle
|
||||
// (e.g. cached manager not picking up newly-created skills).
|
||||
var _ = Describe("Skills exposed to agent via MCP", func() {
|
||||
var (
|
||||
stateDir string
|
||||
svc *agiSkills.Service
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
var err error
|
||||
stateDir, err = os.MkdirTemp("", "skills-mcp-test")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Create the LocalAGI skills service (this is what AgentPoolService wires
|
||||
// into LocalAGI's state.NewAgentPool for MCP session exposure).
|
||||
svc, err = agiSkills.NewService(stateDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
cancel()
|
||||
os.RemoveAll(stateDir)
|
||||
})
|
||||
|
||||
It("returns a skill created after the MCP session was established", func() {
|
||||
// Open the MCP session first — this is what the agent does at startup
|
||||
// with EnableSkills=true, before any skill might exist.
|
||||
session, err := svc.GetMCPSession(ctx)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(session).NotTo(BeNil())
|
||||
|
||||
res, err := session.CallTool(ctx, &mcp.CallToolParams{Name: "list_skills"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(res.IsError).To(BeFalse())
|
||||
var initial listSkillsResult
|
||||
Expect(decodeMCPText(res, &initial)).To(Succeed())
|
||||
Expect(initial.Skills).To(BeEmpty(), "no skills should exist initially")
|
||||
|
||||
// Create a skill via the LocalAI FilesystemManager — same code path the
|
||||
// /api/agents/skills POST endpoint takes.
|
||||
mgr := localskills.NewFilesystemManager(svc)
|
||||
_, err = mgr.Create("talk-like-pirate", "Talk like a pirate", "Speak in pirate-style.", "", "", "", nil)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Re-list via the SAME already-open session: the manager is shared,
|
||||
// so a freshly-created skill must be visible without re-attaching.
|
||||
res, err = session.CallTool(ctx, &mcp.CallToolParams{Name: "list_skills"})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(res.IsError).To(BeFalse())
|
||||
|
||||
var got listSkillsResult
|
||||
Expect(decodeMCPText(res, &got)).To(Succeed())
|
||||
|
||||
ids := make([]string, 0, len(got.Skills))
|
||||
for _, s := range got.Skills {
|
||||
ids = append(ids, s.ID)
|
||||
}
|
||||
Expect(ids).To(ContainElement("talk-like-pirate"))
|
||||
})
|
||||
})
|
||||
|
||||
func mcpText(res *mcp.CallToolResult) string {
|
||||
text := ""
|
||||
for _, c := range res.Content {
|
||||
if tc, ok := c.(*mcp.TextContent); ok {
|
||||
text += tc.Text
|
||||
}
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
func decodeMCPText(res *mcp.CallToolResult, out any) error {
|
||||
text := mcpText(res)
|
||||
if text == "" {
|
||||
return nil
|
||||
}
|
||||
return json.Unmarshal([]byte(text), out)
|
||||
}
|
||||
@@ -1170,8 +1170,8 @@
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwen_Qwen3.5-35B-A3B-GGUF/Qwen_Qwen3.5-35B-A3B-Q4_K_M.gguf
|
||||
sha256: ac15eef4c742ff7700cba697238b25832087b67578f213d5ea24c9e3a6dc4457
|
||||
uri: https://huggingface.co/bartowski/Qwen_Qwen3.5-35B-A3B-GGUF/resolve/main/Qwen_Qwen3.5-35B-A3B-Q4_K_M.gguf
|
||||
sha256: 2f2df1e8b2e92b642c1850ea1734b341cc8ca5098c42cc0a8b8c436a8d4751ab
|
||||
- filename: llama-cpp/mmproj/Qwen_Qwen3.5-35B-A3B-GGUF/mmproj-Qwen_Qwen3.5-35B-A3B-f16.gguf
|
||||
sha256: 10cf13cb1f8434f30df8fa7e5bde98d542fbf397550cb489dfa9eb8ac7069035
|
||||
uri: https://huggingface.co/bartowski/Qwen_Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-Qwen_Qwen3.5-35B-A3B-f16.gguf
|
||||
|
||||
2
go.mod
2
go.mod
@@ -220,7 +220,7 @@ require (
|
||||
github.com/mschoch/smat v0.2.0 // indirect
|
||||
github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87
|
||||
github.com/mudler/localrecall v0.6.1-0.20260507074622-a7724fef6f81 // indirect
|
||||
github.com/mudler/skillserver v0.0.6
|
||||
github.com/mudler/skillserver v0.0.7-0.20260520220837-a7317cbf9145
|
||||
github.com/olekukonko/tablewriter v0.0.5 // indirect
|
||||
github.com/oxffaa/gopher-parse-sitemap v0.0.0-20191021113419-005d2eb1def4 // indirect
|
||||
github.com/philippgille/chromem-go v0.7.0 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@@ -984,6 +984,10 @@ github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8 h1:Ry8RiWy8fZ6Ff4E7d
|
||||
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8/go.mod h1:EA8Ashhd56o32qN7ouPKFSRUs/Z+LrRCF4v6R2Oarm8=
|
||||
github.com/mudler/skillserver v0.0.6 h1:ixz6wUekLdTmbnpAavCkTydDF6UdXAG3ncYufSPK9G0=
|
||||
github.com/mudler/skillserver v0.0.6/go.mod h1:z3yFhcL9bSykmmh6xgGu0hyoItd4CnxgtWMEWw8uFJU=
|
||||
github.com/mudler/skillserver v0.0.7-0.20260520212528-3dae7f041b1e h1:ryXE1UEzGhLkDFYuaxJ0fZ6fg4l++TWfMCTJ1E7bYS8=
|
||||
github.com/mudler/skillserver v0.0.7-0.20260520212528-3dae7f041b1e/go.mod h1:z3yFhcL9bSykmmh6xgGu0hyoItd4CnxgtWMEWw8uFJU=
|
||||
github.com/mudler/skillserver v0.0.7-0.20260520220837-a7317cbf9145 h1:z59tA3IDYPt71nzH1jpxeaA1LuDw8aZfpTQFNU43Zb8=
|
||||
github.com/mudler/skillserver v0.0.7-0.20260520220837-a7317cbf9145/go.mod h1:z3yFhcL9bSykmmh6xgGu0hyoItd4CnxgtWMEWw8uFJU=
|
||||
github.com/mudler/water v0.0.0-20250808092830-dd90dcf09025 h1:WFLP5FHInarYGXi6B/Ze204x7Xy6q/I4nCZnWEyPHK0=
|
||||
github.com/mudler/water v0.0.0-20250808092830-dd90dcf09025/go.mod h1:QuIFdRstyGJt+MTTkWY+mtD7U6xwjOR6SwKUjmLZtR4=
|
||||
github.com/mudler/xlog v0.0.6 h1:3nBV4THK8kY0Y8FDXXvWAnuAJoOyO7EAXteJeAoHUC0=
|
||||
|
||||
Reference in New Issue
Block a user