From ef6ca345132ff584c2b2e8add1694aad03bac2ff Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Wed, 20 May 2026 22:53:19 +0200 Subject: [PATCH] chore: :arrow_up: Update leejet/stable-diffusion.cpp to `5b0267e941cade15bd80089d89838795d9f4baa6` (#9907) Adapt the C++ wrapper to the new `generate_video()` signature: upstream now returns `bool` and writes frames/audio via out-parameters (`sd_image_t**`, `sd_audio_t**`). Also set `p->fps` on the params struct (new upstream field) and free the returned audio handle on both the success and error paths. Assisted-by: claude-code:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- backend/go/stablediffusion-ggml/Makefile | 2 +- backend/go/stablediffusion-ggml/cpp/gosd.cpp | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile index 060a08e95..b646246e3 100644 --- a/backend/go/stablediffusion-ggml/Makefile +++ b/backend/go/stablediffusion-ggml/Makefile @@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1) # stablediffusion.cpp (ggml) STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp -STABLEDIFFUSION_GGML_VERSION?=bd17f53b7386fb5f60e8587b75e73c4b2fed3426 +STABLEDIFFUSION_GGML_VERSION?=5b0267e941cade15bd80089d89838795d9f4baa6 CMAKE_ARGS+=-DGGML_MAX_NAME=128 diff --git a/backend/go/stablediffusion-ggml/cpp/gosd.cpp b/backend/go/stablediffusion-ggml/cpp/gosd.cpp index 17781fb66..e52b6ef4d 100644 --- a/backend/go/stablediffusion-ggml/cpp/gosd.cpp +++ b/backend/go/stablediffusion-ggml/cpp/gosd.cpp @@ -1188,6 +1188,9 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int p->high_noise_sample_params.scheduler = scheduler; p->high_noise_sample_params.flow_shift = flow_shift; + // Pin output fps in params; upstream uses it for audio sync (and we also mux at this rate). + p->fps = fps; + // Load init/end reference images if provided (resized to output dims). uint8_t* init_buf = nullptr; uint8_t* end_buf = nullptr; @@ -1206,11 +1209,14 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int // Generate int num_frames_out = 0; - sd_image_t* frames = generate_video(sd_c, p, &num_frames_out); + sd_image_t* frames = nullptr; + sd_audio_t* audio = nullptr; + bool ok = generate_video(sd_c, p, &frames, &num_frames_out, &audio); std::free(p); - if (!frames || num_frames_out == 0) { + if (!ok || !frames || num_frames_out == 0) { fprintf(stderr, "generate_video produced no frames\n"); + if (audio) free_sd_audio(audio); if (init_buf) free(init_buf); if (end_buf) free(end_buf); return 1; @@ -1224,6 +1230,7 @@ int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int if (frames[i].data) free(frames[i].data); } free(frames); + if (audio) free_sd_audio(audio); if (init_buf) free(init_buf); if (end_buf) free(end_buf);