Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
694088ebfe refactor(agents): bump skillserver, drop redundant Name from list_skills/search_skills
skillserver's list_skills MCP tool used to ship every entry with name=""
(field was commented out), while search_skills populated it - two tools
with inconsistent shape for the same data. skill.Name and skill.ID are
populated from the same source string anyway (the directory name), so
returning both was pure duplication.

Bumps github.com/mudler/skillserver to a7317cb, which drops the Name
field from both SkillInfo and SearchResult and leaves ID as the single
canonical identifier (already what read_skill consumes).

Adds core/services/skills/skills_mcp_test.go, a regression that drives
the LocalAI FilesystemManager through an in-process MCP session and
asserts a newly-created skill is visible by ID on the still-open session.

This is a cleanup, not the root cause of #9868 - the reporter likely
sees something deeper than a cosmetic JSON shape issue.

Assisted-by: Claude:claude-opus-4-7 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-20 22:10:07 +00:00
16 changed files with 157 additions and 41 deletions

View File

@@ -1,10 +1,10 @@
# ds4 backend Makefile.
#
# Upstream pin lives below as DS4_VERSION?=2606543be7a8c125a32cee37f5d1d85dc78f2fcf
# Upstream pin lives below as DS4_VERSION?=599e49d253971451f710cb8323344e789906ed6c
# (.github/bump_deps.sh) can find and update it - matches the
# llama-cpp / ik-llama-cpp / turboquant convention.
DS4_VERSION?=2606543be7a8c125a32cee37f5d1d85dc78f2fcf
DS4_VERSION?=599e49d253971451f710cb8323344e789906ed6c
DS4_REPO?=https://github.com/antirez/ds4
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

View File

@@ -1,5 +1,5 @@
IK_LLAMA_VERSION?=11a1fea9e291f12ce2c803a9d7812c30ca806bcf
IK_LLAMA_VERSION?=77413bc900f9a2bfd8a5407f184427bcc0825f6c
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
CMAKE_ARGS?=

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# acestep.cpp version
ACESTEP_REPO?=https://github.com/ace-step/acestep.cpp
ACESTEP_CPP_VERSION?=ed53caf164e4492a5620b2e3f2264629cf66da24
ACESTEP_CPP_VERSION?=e0c8d75a672fca5684c88c68dbf6d12f58754258
SO_TARGET?=libgoacestepcpp.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -22,11 +22,12 @@
#include <vector>
// Global model contexts (loaded once, reused across requests)
static DiTGGML g_dit = {};
static VAEGGML g_vae = {};
static bool g_dit_loaded = false;
static bool g_vae_loaded = false;
static bool g_is_turbo = false;
static DiTGGML g_dit = {};
static DiTGGMLConfig g_dit_cfg;
static VAEGGML g_vae = {};
static bool g_dit_loaded = false;
static bool g_vae_loaded = false;
static bool g_is_turbo = false;
// Silence latent [15000, 64] — read once from DiT GGUF
static std::vector<float> g_silence_full;
@@ -71,9 +72,10 @@ int load_model(const char * lm_model_path, const char * text_encoder_path,
g_text_enc_path = text_encoder_path;
g_dit_path = dit_model_path;
// Load DiT model (backend init + config are handled inside dit_ggml_load)
// Load DiT model
fprintf(stderr, "[acestep-cpp] Loading DiT from %s\n", dit_model_path);
if (!dit_ggml_load(&g_dit, dit_model_path)) {
dit_ggml_init_backend(&g_dit);
if (!dit_ggml_load(&g_dit, dit_model_path, g_dit_cfg, nullptr, 0.0f)) {
fprintf(stderr, "[acestep-cpp] FATAL: failed to load DiT from %s\n", dit_model_path);
return 1;
}
@@ -147,16 +149,16 @@ int generate_music(const char * caption, const char * lyrics, int bpm,
// Compute T (latent frames at 25Hz)
int T = (int)(duration * FRAMES_PER_SECOND);
T = ((T + g_dit.cfg.patch_size - 1) / g_dit.cfg.patch_size) * g_dit.cfg.patch_size;
int S = T / g_dit.cfg.patch_size;
T = ((T + g_dit_cfg.patch_size - 1) / g_dit_cfg.patch_size) * g_dit_cfg.patch_size;
int S = T / g_dit_cfg.patch_size;
if (T > 15000) {
fprintf(stderr, "[acestep-cpp] ERROR: T=%d exceeds max 15000\n", T);
return 2;
}
int Oc = g_dit.cfg.out_channels; // 64
int ctx_ch = g_dit.cfg.in_channels - Oc; // 128
int Oc = g_dit_cfg.out_channels; // 64
int ctx_ch = g_dit_cfg.in_channels - Oc; // 128
fprintf(stderr, "[acestep-cpp] T=%d, S=%d, duration=%.1fs, seed=%d\n", T, S, duration, seed);
@@ -189,8 +191,9 @@ int generate_music(const char * caption, const char * lyrics, int bpm,
fprintf(stderr, "[acestep-cpp] caption: %d tokens, lyrics: %d tokens\n", S_text, S_lyric);
// 4. Text encoder forward (backend init handled inside qwen3_load_text_encoder)
// 4. Text encoder forward
Qwen3GGML text_enc = {};
qwen3_init_backend(&text_enc);
if (!qwen3_load_text_encoder(&text_enc, g_text_enc_path.c_str())) {
fprintf(stderr, "[acestep-cpp] FATAL: failed to load text encoder\n");
return 4;
@@ -206,8 +209,9 @@ int generate_music(const char * caption, const char * lyrics, int bpm,
std::vector<float> lyric_embed(H_text * S_lyric);
qwen3_embed_lookup(&text_enc, lyric_ids.data(), S_lyric, lyric_embed.data());
// 6. Condition encoder (backend init handled inside cond_ggml_load)
// 6. Condition encoder
CondGGML cond = {};
cond_ggml_init_backend(&cond);
if (!cond_ggml_load(&cond, g_dit_path.c_str())) {
fprintf(stderr, "[acestep-cpp] FATAL: failed to load condition encoder\n");
qwen3_free(&text_enc);

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=5b0267e941cade15bd80089d89838795d9f4baa6
STABLEDIFFUSION_GGML_VERSION?=bd17f53b7386fb5f60e8587b75e73c4b2fed3426
CMAKE_ARGS+=-DGGML_MAX_NAME=128

View File

@@ -1188,9 +1188,6 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int
p->high_noise_sample_params.scheduler = scheduler;
p->high_noise_sample_params.flow_shift = flow_shift;
// Pin output fps in params; upstream uses it for audio sync (and we also mux at this rate).
p->fps = fps;
// Load init/end reference images if provided (resized to output dims).
uint8_t* init_buf = nullptr;
uint8_t* end_buf = nullptr;
@@ -1209,14 +1206,11 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int
// Generate
int num_frames_out = 0;
sd_image_t* frames = nullptr;
sd_audio_t* audio = nullptr;
bool ok = generate_video(sd_c, p, &frames, &num_frames_out, &audio);
sd_image_t* frames = generate_video(sd_c, p, &num_frames_out);
std::free(p);
if (!ok || !frames || num_frames_out == 0) {
if (!frames || num_frames_out == 0) {
fprintf(stderr, "generate_video produced no frames\n");
if (audio) free_sd_audio(audio);
if (init_buf) free(init_buf);
if (end_buf) free(end_buf);
return 1;
@@ -1230,7 +1224,6 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int
if (frames[i].data) free(frames[i].data);
}
free(frames);
if (audio) free_sd_audio(audio);
if (init_buf) free(init_buf);
if (end_buf) free(end_buf);

View File

@@ -2,9 +2,9 @@ torch==2.7.1
llvmlite==0.43.0
numba==0.60.0
accelerate
transformers>=5.8.1
transformers>=5.8.0
bitsandbytes
sentence-transformers==5.5.0
sentence-transformers==5.4.0
diffusers
soundfile
protobuf==6.33.5

View File

@@ -2,9 +2,9 @@ torch==2.7.1
accelerate
llvmlite==0.43.0
numba==0.60.0
transformers>=5.8.1
transformers>=5.8.0
bitsandbytes
sentence-transformers==5.5.0
sentence-transformers==5.4.0
diffusers
soundfile
protobuf==6.33.5

View File

@@ -2,9 +2,9 @@
torch==2.9.0
llvmlite==0.43.0
numba==0.60.0
transformers>=5.8.1
transformers>=5.8.0
bitsandbytes
sentence-transformers==5.5.0
sentence-transformers==5.4.0
diffusers
soundfile
protobuf==6.33.5

View File

@@ -1,11 +1,11 @@
--extra-index-url https://download.pytorch.org/whl/rocm7.0
torch==2.10.0+rocm7.0
accelerate
transformers>=5.8.1
transformers>=5.8.0
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
sentence-transformers==5.5.0
sentence-transformers==5.4.0
diffusers
soundfile
protobuf==6.33.5

View File

@@ -3,9 +3,9 @@ torch
optimum[openvino]
llvmlite==0.43.0
numba==0.60.0
transformers>=5.8.1
transformers>=5.8.0
bitsandbytes
sentence-transformers==5.5.0
sentence-transformers==5.4.0
diffusers
soundfile
protobuf==6.33.5

View File

@@ -2,9 +2,9 @@ torch==2.7.1
llvmlite==0.43.0
numba==0.60.0
accelerate
transformers>=5.8.1
transformers>=5.8.0
bitsandbytes
sentence-transformers==5.5.0
sentence-transformers==5.4.0
diffusers
soundfile
protobuf==6.33.5

View File

@@ -0,0 +1,115 @@
package skills_test
import (
"context"
"encoding/json"
"os"
"testing"
"time"
"github.com/modelcontextprotocol/go-sdk/mcp"
agiSkills "github.com/mudler/LocalAGI/services/skills"
localskills "github.com/mudler/LocalAI/core/services/skills"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestSkillsMCP(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Skills MCP test")
}
// listSkillsResult mirrors the output struct of skillserver's list_skills tool.
type listSkillsResult struct {
Skills []struct {
ID string `json:"id"`
Description string `json:"description,omitempty"`
} `json:"skills"`
}
// Exercises the same wire the agent uses at runtime: open an in-process
// MCP session via LocalAGI's skills.Service, create a skill through the
// LocalAI FilesystemManager, then list_skills on the still-open session.
// Guards against regressions in the manager <-> MCP session lifecycle
// (e.g. cached manager not picking up newly-created skills).
var _ = Describe("Skills exposed to agent via MCP", func() {
var (
stateDir string
svc *agiSkills.Service
ctx context.Context
cancel context.CancelFunc
)
BeforeEach(func() {
var err error
stateDir, err = os.MkdirTemp("", "skills-mcp-test")
Expect(err).NotTo(HaveOccurred())
// Create the LocalAGI skills service (this is what AgentPoolService wires
// into LocalAGI's state.NewAgentPool for MCP session exposure).
svc, err = agiSkills.NewService(stateDir)
Expect(err).NotTo(HaveOccurred())
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
})
AfterEach(func() {
cancel()
os.RemoveAll(stateDir)
})
It("returns a skill created after the MCP session was established", func() {
// Open the MCP session first — this is what the agent does at startup
// with EnableSkills=true, before any skill might exist.
session, err := svc.GetMCPSession(ctx)
Expect(err).NotTo(HaveOccurred())
Expect(session).NotTo(BeNil())
res, err := session.CallTool(ctx, &mcp.CallToolParams{Name: "list_skills"})
Expect(err).NotTo(HaveOccurred())
Expect(res.IsError).To(BeFalse())
var initial listSkillsResult
Expect(decodeMCPText(res, &initial)).To(Succeed())
Expect(initial.Skills).To(BeEmpty(), "no skills should exist initially")
// Create a skill via the LocalAI FilesystemManager — same code path the
// /api/agents/skills POST endpoint takes.
mgr := localskills.NewFilesystemManager(svc)
_, err = mgr.Create("talk-like-pirate", "Talk like a pirate", "Speak in pirate-style.", "", "", "", nil)
Expect(err).NotTo(HaveOccurred())
// Re-list via the SAME already-open session: the manager is shared,
// so a freshly-created skill must be visible without re-attaching.
res, err = session.CallTool(ctx, &mcp.CallToolParams{Name: "list_skills"})
Expect(err).NotTo(HaveOccurred())
Expect(res.IsError).To(BeFalse())
var got listSkillsResult
Expect(decodeMCPText(res, &got)).To(Succeed())
ids := make([]string, 0, len(got.Skills))
for _, s := range got.Skills {
ids = append(ids, s.ID)
}
Expect(ids).To(ContainElement("talk-like-pirate"))
})
})
func mcpText(res *mcp.CallToolResult) string {
text := ""
for _, c := range res.Content {
if tc, ok := c.(*mcp.TextContent); ok {
text += tc.Text
}
}
return text
}
func decodeMCPText(res *mcp.CallToolResult, out any) error {
text := mcpText(res)
if text == "" {
return nil
}
return json.Unmarshal([]byte(text), out)
}

View File

@@ -1170,8 +1170,8 @@
use_tokenizer_template: true
files:
- filename: llama-cpp/models/Qwen_Qwen3.5-35B-A3B-GGUF/Qwen_Qwen3.5-35B-A3B-Q4_K_M.gguf
sha256: ac15eef4c742ff7700cba697238b25832087b67578f213d5ea24c9e3a6dc4457
uri: https://huggingface.co/bartowski/Qwen_Qwen3.5-35B-A3B-GGUF/resolve/main/Qwen_Qwen3.5-35B-A3B-Q4_K_M.gguf
sha256: 2f2df1e8b2e92b642c1850ea1734b341cc8ca5098c42cc0a8b8c436a8d4751ab
- filename: llama-cpp/mmproj/Qwen_Qwen3.5-35B-A3B-GGUF/mmproj-Qwen_Qwen3.5-35B-A3B-f16.gguf
sha256: 10cf13cb1f8434f30df8fa7e5bde98d542fbf397550cb489dfa9eb8ac7069035
uri: https://huggingface.co/bartowski/Qwen_Qwen3.5-35B-A3B-GGUF/resolve/main/mmproj-Qwen_Qwen3.5-35B-A3B-f16.gguf

2
go.mod
View File

@@ -220,7 +220,7 @@ require (
github.com/mschoch/smat v0.2.0 // indirect
github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87
github.com/mudler/localrecall v0.6.1-0.20260507074622-a7724fef6f81 // indirect
github.com/mudler/skillserver v0.0.6
github.com/mudler/skillserver v0.0.7-0.20260520220837-a7317cbf9145
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/oxffaa/gopher-parse-sitemap v0.0.0-20191021113419-005d2eb1def4 // indirect
github.com/philippgille/chromem-go v0.7.0 // indirect

4
go.sum
View File

@@ -984,6 +984,10 @@ github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8 h1:Ry8RiWy8fZ6Ff4E7d
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8/go.mod h1:EA8Ashhd56o32qN7ouPKFSRUs/Z+LrRCF4v6R2Oarm8=
github.com/mudler/skillserver v0.0.6 h1:ixz6wUekLdTmbnpAavCkTydDF6UdXAG3ncYufSPK9G0=
github.com/mudler/skillserver v0.0.6/go.mod h1:z3yFhcL9bSykmmh6xgGu0hyoItd4CnxgtWMEWw8uFJU=
github.com/mudler/skillserver v0.0.7-0.20260520212528-3dae7f041b1e h1:ryXE1UEzGhLkDFYuaxJ0fZ6fg4l++TWfMCTJ1E7bYS8=
github.com/mudler/skillserver v0.0.7-0.20260520212528-3dae7f041b1e/go.mod h1:z3yFhcL9bSykmmh6xgGu0hyoItd4CnxgtWMEWw8uFJU=
github.com/mudler/skillserver v0.0.7-0.20260520220837-a7317cbf9145 h1:z59tA3IDYPt71nzH1jpxeaA1LuDw8aZfpTQFNU43Zb8=
github.com/mudler/skillserver v0.0.7-0.20260520220837-a7317cbf9145/go.mod h1:z3yFhcL9bSykmmh6xgGu0hyoItd4CnxgtWMEWw8uFJU=
github.com/mudler/water v0.0.0-20250808092830-dd90dcf09025 h1:WFLP5FHInarYGXi6B/Ze204x7Xy6q/I4nCZnWEyPHK0=
github.com/mudler/water v0.0.0-20250808092830-dd90dcf09025/go.mod h1:QuIFdRstyGJt+MTTkWY+mtD7U6xwjOR6SwKUjmLZtR4=
github.com/mudler/xlog v0.0.6 h1:3nBV4THK8kY0Y8FDXXvWAnuAJoOyO7EAXteJeAoHUC0=