From 5a67b5d73c2930bb8aa6e24ec4493e32cbf13197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20Gy=C3=B6rffy?= <9091+attilagyorffy@users.noreply.github.com> Date: Wed, 11 Mar 2026 08:05:50 +0100 Subject: [PATCH] Fix image upload processing and img2img pipeline in diffusers backend (#8879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add missing bufio.Flush in processImageFile The processImageFile function writes decoded image data (from base64 or URL download) through a bufio.NewWriter but never calls Flush() before closing the underlying file. Since bufio's default buffer is 4096 bytes, small images produce 0-byte files and large images are truncated — causing PIL to fail with "cannot identify image file". This breaks all image input paths: file, files, and ref_images parameters in /v1/images/generations, making img2img, inpainting, and reference image features non-functional. Signed-off-by: Attila Györffy * fix: merge options into kwargs in diffusers GenerateImage The GenerateImage method builds a local `options` dict containing the source image (PIL), negative_prompt, and num_inference_steps, but never merges it into `kwargs` before calling self.pipe(**kwargs). This causes img2img to fail with "Input is in incorrect format" because the pipeline never receives the image parameter. Signed-off-by: Attila Györffy * test: add unit test for processImageFile base64 decoding Verifies that a base64-encoded PNG survives the write path (encode → decode → bufio.Write → Flush → file on disk) with byte-for-byte fidelity. The test image is small enough to fit entirely in bufio's 4096-byte buffer, which is the exact scenario where the missing Flush() produced a 0-byte file. Also tests that invalid base64 input is handled gracefully. Signed-off-by: Attila Györffy * test: verify GenerateImage merges options into pipeline kwargs Mocks the diffusers pipeline and calls GenerateImage with a source image and negative prompt. Asserts that the pipeline receives the image, negative_prompt, and num_inference_steps via kwargs — the exact parameters that were silently dropped before the fix. Signed-off-by: Attila Györffy * fix: move kwargs.update(options) earlier in GenerateImage Move the options merge right after self.options merge (L742) so that image, negative_prompt, and num_inference_steps are available to all downstream code paths including img2vid and txt2vid. Signed-off-by: Attila Györffy * test: convert processImageFile tests to ginkgo Replace standard testing with ginkgo/gomega to be consistent with the rest of the test suites in the project. Signed-off-by: Attila Györffy --------- Signed-off-by: Attila Györffy Co-authored-by: Ettore Di Giacinto --- backend/python/diffusers/backend.py | 2 + backend/python/diffusers/test.py | 61 +++++++++++++++++++ core/http/endpoints/openai/image.go | 7 ++- core/http/endpoints/openai/image_test.go | 55 +++++++++++++++++ .../endpoints/openai/openai_suite_test.go | 13 ++++ 5 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 core/http/endpoints/openai/image_test.go create mode 100644 core/http/endpoints/openai/openai_suite_test.go diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index 979731e55..19daeff20 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -741,6 +741,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): # populate kwargs from self.options. kwargs.update(self.options) + kwargs.update(options) + # Set seed if request.seed > 0: kwargs["generator"] = torch.Generator(device=self.device).manual_seed( diff --git a/backend/python/diffusers/test.py b/backend/python/diffusers/test.py index 5befeca0a..eff293ee6 100644 --- a/backend/python/diffusers/test.py +++ b/backend/python/diffusers/test.py @@ -312,3 +312,64 @@ class TestDiffusersDynamicLoaderWithMocks(unittest.TestCase): # or fail depending on network, but the fallback path should work. cls = loader.resolve_pipeline_class(model_id="some/nonexistent/model") self.assertEqual(cls, DiffusionPipeline) + + +@unittest.skipUnless(GRPC_AVAILABLE, "gRPC modules not available") +class TestGenerateImageOptionsKwargsMerge(unittest.TestCase): + """Test that GenerateImage merges the options dict into pipeline kwargs. + + The options dict holds image (PIL), negative_prompt, and + num_inference_steps. Without the merge, img2img pipelines never + receive the source image and fail with 'Input is in incorrect format'. + """ + + def test_options_merged_into_pipeline_kwargs(self): + from backend import BackendServicer + from PIL import Image + import tempfile, os + + svc = BackendServicer.__new__(BackendServicer) + # Minimal attributes the method reads + svc.pipe = MagicMock() + svc.pipe.return_value.images = [Image.new("RGB", (4, 4))] + svc.cfg_scale = 7.5 + svc.controlnet = None + svc.img2vid = False + svc.txt2vid = False + svc.clip_skip = 0 + svc.PipelineType = "StableDiffusionImg2ImgPipeline" + svc.options = {} + + # Create a tiny source image for the request's src field + src_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False) + Image.new("RGB", (4, 4), color="red").save(src_file, format="PNG") + src_file.close() + + dst_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False) + dst_file.close() + + try: + request = MagicMock() + request.positive_prompt = "a test prompt" + request.negative_prompt = "bad quality" + request.step = 10 + request.seed = 0 + request.width = 0 + request.height = 0 + request.src = src_file.name + request.ref_images = [] + request.dst = dst_file.name + + svc.GenerateImage(request, context=None) + + # The pipeline must have been called with the image kwarg + svc.pipe.assert_called_once() + _, call_kwargs = svc.pipe.call_args + self.assertIn("image", call_kwargs, + "source image must be passed to pipeline via kwargs") + self.assertIn("negative_prompt", call_kwargs, + "negative_prompt must be passed to pipeline via kwargs") + self.assertEqual(call_kwargs["num_inference_steps"], 10) + finally: + os.unlink(src_file.name) + os.unlink(dst_file.name) diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 86c7bc2dc..32834a923 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -288,7 +288,7 @@ func processImageFile(file string, generatedContentDir string) string { return "" } - // write the base64 result + // write the decoded result writer := bufio.NewWriter(outputFile) _, err = writer.Write(fileData) if err != nil { @@ -296,6 +296,11 @@ func processImageFile(file string, generatedContentDir string) string { xlog.Error("Failed writing to temporary file", "error", err) return "" } + if err := writer.Flush(); err != nil { + outputFile.Close() + xlog.Error("Failed flushing to temporary file", "error", err) + return "" + } outputFile.Close() return outputFile.Name() diff --git a/core/http/endpoints/openai/image_test.go b/core/http/endpoints/openai/image_test.go new file mode 100644 index 000000000..c3385cb4c --- /dev/null +++ b/core/http/endpoints/openai/image_test.go @@ -0,0 +1,55 @@ +package openai + +import ( + "encoding/base64" + "os" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("processImageFile", func() { + var tmpDir string + + BeforeEach(func() { + var err error + tmpDir, err = os.MkdirTemp("", "processimage") + Expect(err).ToNot(HaveOccurred()) + }) + + AfterEach(func() { + os.RemoveAll(tmpDir) + }) + + It("should decode base64 and write all bytes to disk", func() { + // 4x4 red pixel PNG (68 bytes raw) — small enough to fit in bufio's + // default 4096-byte buffer, which is exactly the scenario where a + // missing Flush() produces a 0-byte file. + pngBytes := []byte{ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature + 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR chunk + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, + 0x08, 0x02, 0x00, 0x00, 0x00, 0x26, 0x93, 0x09, + 0x29, 0x00, 0x00, 0x00, 0x1c, 0x49, 0x44, 0x41, // IDAT chunk + 0x54, 0x78, 0x9c, 0x62, 0xf8, 0xcf, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, + 0x00, 0x31, 0x00, 0x01, 0x2e, 0xa8, 0xd1, 0xe5, + 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, // IEND chunk + 0xae, 0x42, 0x60, 0x82, + } + b64 := base64.StdEncoding.EncodeToString(pngBytes) + + outPath := processImageFile(b64, tmpDir) + Expect(outPath).ToNot(BeEmpty(), "processImageFile should return a file path") + + written, err := os.ReadFile(outPath) + Expect(err).ToNot(HaveOccurred()) + Expect(written).To(Equal(pngBytes), "file on disk must match the original bytes") + }) + + It("should return empty string for invalid base64", func() { + outPath := processImageFile("not-valid-base64!!!", tmpDir) + Expect(outPath).To(BeEmpty(), "should return empty string for invalid base64") + }) +}) diff --git a/core/http/endpoints/openai/openai_suite_test.go b/core/http/endpoints/openai/openai_suite_test.go new file mode 100644 index 000000000..fbabe6b2f --- /dev/null +++ b/core/http/endpoints/openai/openai_suite_test.go @@ -0,0 +1,13 @@ +package openai + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestOpenAI(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "OpenAI Endpoints Suite") +}