From 5a67b5d73c2930bb8aa6e24ec4493e32cbf13197 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Attila=20Gy=C3=B6rffy?=
 <9091+attilagyorffy@users.noreply.github.com>
Date: Wed, 11 Mar 2026 08:05:50 +0100
Subject: [PATCH] Fix image upload processing and img2img pipeline in diffusers
 backend (#8879)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: add missing bufio.Flush in processImageFile

The processImageFile function writes decoded image data (from base64
or URL download) through a bufio.NewWriter but never calls Flush()
before closing the underlying file. Since bufio's default buffer is
4096 bytes, small images produce 0-byte files and large images are
truncated — causing PIL to fail with "cannot identify image file".

This breaks all image input paths: file, files, and ref_images
parameters in /v1/images/generations, making img2img, inpainting,
and reference image features non-functional.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* fix: merge options into kwargs in diffusers GenerateImage

The GenerateImage method builds a local `options` dict containing the
source image (PIL), negative_prompt, and num_inference_steps, but
never merges it into `kwargs` before calling self.pipe(**kwargs).
This causes img2img to fail with "Input is in incorrect format"
because the pipeline never receives the image parameter.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* test: add unit test for processImageFile base64 decoding

Verifies that a base64-encoded PNG survives the write path
(encode → decode → bufio.Write → Flush → file on disk) with
byte-for-byte fidelity. The test image is small enough to fit
entirely in bufio's 4096-byte buffer, which is the exact scenario
where the missing Flush() produced a 0-byte file.

Also tests that invalid base64 input is handled gracefully.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* test: verify GenerateImage merges options into pipeline kwargs

Mocks the diffusers pipeline and calls GenerateImage with a source
image and negative prompt. Asserts that the pipeline receives the
image, negative_prompt, and num_inference_steps via kwargs — the
exact parameters that were silently dropped before the fix.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* fix: move kwargs.update(options) earlier in GenerateImage

Move the options merge right after self.options merge (L742) so that
image, negative_prompt, and num_inference_steps are available to all
downstream code paths including img2vid and txt2vid.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* test: convert processImageFile tests to ginkgo

Replace standard testing with ginkgo/gomega to be consistent with
the rest of the test suites in the project.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

---------

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 backend/python/diffusers/backend.py           |  2 +
 backend/python/diffusers/test.py              | 61 +++++++++++++++++++
 core/http/endpoints/openai/image.go           |  7 ++-
 core/http/endpoints/openai/image_test.go      | 55 +++++++++++++++++
 .../endpoints/openai/openai_suite_test.go     | 13 ++++
 5 files changed, 137 insertions(+), 1 deletion(-)
 create mode 100644 core/http/endpoints/openai/image_test.go
 create mode 100644 core/http/endpoints/openai/openai_suite_test.go

diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
index 979731e55..19daeff20 100755
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -741,6 +741,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         # populate kwargs from self.options.
         kwargs.update(self.options)
 
+        kwargs.update(options)
+
         # Set seed
         if request.seed > 0:
             kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
diff --git a/backend/python/diffusers/test.py b/backend/python/diffusers/test.py
index 5befeca0a..eff293ee6 100644
--- a/backend/python/diffusers/test.py
+++ b/backend/python/diffusers/test.py
@@ -312,3 +312,64 @@ class TestDiffusersDynamicLoaderWithMocks(unittest.TestCase):
         # or fail depending on network, but the fallback path should work.
         cls = loader.resolve_pipeline_class(model_id="some/nonexistent/model")
         self.assertEqual(cls, DiffusionPipeline)
+
+
+@unittest.skipUnless(GRPC_AVAILABLE, "gRPC modules not available")
+class TestGenerateImageOptionsKwargsMerge(unittest.TestCase):
+    """Test that GenerateImage merges the options dict into pipeline kwargs.
+
+    The options dict holds image (PIL), negative_prompt, and
+    num_inference_steps. Without the merge, img2img pipelines never
+    receive the source image and fail with 'Input is in incorrect format'.
+    """
+
+    def test_options_merged_into_pipeline_kwargs(self):
+        from backend import BackendServicer
+        from PIL import Image
+        import tempfile, os
+
+        svc = BackendServicer.__new__(BackendServicer)
+        # Minimal attributes the method reads
+        svc.pipe = MagicMock()
+        svc.pipe.return_value.images = [Image.new("RGB", (4, 4))]
+        svc.cfg_scale = 7.5
+        svc.controlnet = None
+        svc.img2vid = False
+        svc.txt2vid = False
+        svc.clip_skip = 0
+        svc.PipelineType = "StableDiffusionImg2ImgPipeline"
+        svc.options = {}
+
+        # Create a tiny source image for the request's src field
+        src_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+        Image.new("RGB", (4, 4), color="red").save(src_file, format="PNG")
+        src_file.close()
+
+        dst_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+        dst_file.close()
+
+        try:
+            request = MagicMock()
+            request.positive_prompt = "a test prompt"
+            request.negative_prompt = "bad quality"
+            request.step = 10
+            request.seed = 0
+            request.width = 0
+            request.height = 0
+            request.src = src_file.name
+            request.ref_images = []
+            request.dst = dst_file.name
+
+            svc.GenerateImage(request, context=None)
+
+            # The pipeline must have been called with the image kwarg
+            svc.pipe.assert_called_once()
+            _, call_kwargs = svc.pipe.call_args
+            self.assertIn("image", call_kwargs,
+                          "source image must be passed to pipeline via kwargs")
+            self.assertIn("negative_prompt", call_kwargs,
+                          "negative_prompt must be passed to pipeline via kwargs")
+            self.assertEqual(call_kwargs["num_inference_steps"], 10)
+        finally:
+            os.unlink(src_file.name)
+            os.unlink(dst_file.name)
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index 86c7bc2dc..32834a923 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -288,7 +288,7 @@ func processImageFile(file string, generatedContentDir string) string {
 		return ""
 	}
 
-	// write the base64 result
+	// write the decoded result
 	writer := bufio.NewWriter(outputFile)
 	_, err = writer.Write(fileData)
 	if err != nil {
@@ -296,6 +296,11 @@ func processImageFile(file string, generatedContentDir string) string {
 		xlog.Error("Failed writing to temporary file", "error", err)
 		return ""
 	}
+	if err := writer.Flush(); err != nil {
+		outputFile.Close()
+		xlog.Error("Failed flushing to temporary file", "error", err)
+		return ""
+	}
 	outputFile.Close()
 
 	return outputFile.Name()
diff --git a/core/http/endpoints/openai/image_test.go b/core/http/endpoints/openai/image_test.go
new file mode 100644
index 000000000..c3385cb4c
--- /dev/null
+++ b/core/http/endpoints/openai/image_test.go
@@ -0,0 +1,55 @@
+package openai
+
+import (
+	"encoding/base64"
+	"os"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("processImageFile", func() {
+	var tmpDir string
+
+	BeforeEach(func() {
+		var err error
+		tmpDir, err = os.MkdirTemp("", "processimage")
+		Expect(err).ToNot(HaveOccurred())
+	})
+
+	AfterEach(func() {
+		os.RemoveAll(tmpDir)
+	})
+
+	It("should decode base64 and write all bytes to disk", func() {
+		// 4x4 red pixel PNG (68 bytes raw) — small enough to fit in bufio's
+		// default 4096-byte buffer, which is exactly the scenario where a
+		// missing Flush() produces a 0-byte file.
+		pngBytes := []byte{
+			0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature
+			0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR chunk
+			0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04,
+			0x08, 0x02, 0x00, 0x00, 0x00, 0x26, 0x93, 0x09,
+			0x29, 0x00, 0x00, 0x00, 0x1c, 0x49, 0x44, 0x41, // IDAT chunk
+			0x54, 0x78, 0x9c, 0x62, 0xf8, 0xcf, 0xc0, 0xc0,
+			0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
+			0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00,
+			0x00, 0x31, 0x00, 0x01, 0x2e, 0xa8, 0xd1, 0xe5,
+			0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, // IEND chunk
+			0xae, 0x42, 0x60, 0x82,
+		}
+		b64 := base64.StdEncoding.EncodeToString(pngBytes)
+
+		outPath := processImageFile(b64, tmpDir)
+		Expect(outPath).ToNot(BeEmpty(), "processImageFile should return a file path")
+
+		written, err := os.ReadFile(outPath)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(written).To(Equal(pngBytes), "file on disk must match the original bytes")
+	})
+
+	It("should return empty string for invalid base64", func() {
+		outPath := processImageFile("not-valid-base64!!!", tmpDir)
+		Expect(outPath).To(BeEmpty(), "should return empty string for invalid base64")
+	})
+})
diff --git a/core/http/endpoints/openai/openai_suite_test.go b/core/http/endpoints/openai/openai_suite_test.go
new file mode 100644
index 000000000..fbabe6b2f
--- /dev/null
+++ b/core/http/endpoints/openai/openai_suite_test.go
@@ -0,0 +1,13 @@
+package openai
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestOpenAI(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "OpenAI Endpoints Suite")
+}