Fix image upload processing and img2img pipeline in diffusers backend (#8879)

* fix: add missing bufio.Flush in processImageFile

The processImageFile function writes decoded image data (from base64
or URL download) through a bufio.NewWriter but never calls Flush()
before closing the underlying file. Since bufio's default buffer is
4096 bytes, small images produce 0-byte files and large images are
truncated — causing PIL to fail with "cannot identify image file".

This breaks all image input paths: file, files, and ref_images
parameters in /v1/images/generations, making img2img, inpainting,
and reference image features non-functional.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* fix: merge options into kwargs in diffusers GenerateImage

The GenerateImage method builds a local `options` dict containing the
source image (PIL), negative_prompt, and num_inference_steps, but
never merges it into `kwargs` before calling self.pipe(**kwargs).
This causes img2img to fail with "Input is in incorrect format"
because the pipeline never receives the image parameter.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* test: add unit test for processImageFile base64 decoding

Verifies that a base64-encoded PNG survives the write path
(encode → decode → bufio.Write → Flush → file on disk) with
byte-for-byte fidelity. The test image is small enough to fit
entirely in bufio's 4096-byte buffer, which is the exact scenario
where the missing Flush() produced a 0-byte file.

Also tests that invalid base64 input is handled gracefully.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* test: verify GenerateImage merges options into pipeline kwargs

Mocks the diffusers pipeline and calls GenerateImage with a source
image and negative prompt. Asserts that the pipeline receives the
image, negative_prompt, and num_inference_steps via kwargs — the
exact parameters that were silently dropped before the fix.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* fix: move kwargs.update(options) earlier in GenerateImage

Move the options merge right after self.options merge (L742) so that
image, negative_prompt, and num_inference_steps are available to all
downstream code paths including img2vid and txt2vid.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

* test: convert processImageFile tests to ginkgo

Replace standard testing with ginkgo/gomega to be consistent with
the rest of the test suites in the project.

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>

---------

Signed-off-by: Attila Györffy <attila+git@attilagyorffy.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
Attila Györffy
2026-03-11 08:05:50 +01:00
committed by GitHub
parent 270eb956c7
commit 5a67b5d73c
5 changed files with 137 additions and 1 deletions

View File

@@ -741,6 +741,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# populate kwargs from self.options.
kwargs.update(self.options)
kwargs.update(options)
# Set seed
if request.seed > 0:
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(

View File

@@ -312,3 +312,64 @@ class TestDiffusersDynamicLoaderWithMocks(unittest.TestCase):
# or fail depending on network, but the fallback path should work.
cls = loader.resolve_pipeline_class(model_id="some/nonexistent/model")
self.assertEqual(cls, DiffusionPipeline)
@unittest.skipUnless(GRPC_AVAILABLE, "gRPC modules not available")
class TestGenerateImageOptionsKwargsMerge(unittest.TestCase):
"""Test that GenerateImage merges the options dict into pipeline kwargs.
The options dict holds image (PIL), negative_prompt, and
num_inference_steps. Without the merge, img2img pipelines never
receive the source image and fail with 'Input is in incorrect format'.
"""
def test_options_merged_into_pipeline_kwargs(self):
from backend import BackendServicer
from PIL import Image
import tempfile, os
svc = BackendServicer.__new__(BackendServicer)
# Minimal attributes the method reads
svc.pipe = MagicMock()
svc.pipe.return_value.images = [Image.new("RGB", (4, 4))]
svc.cfg_scale = 7.5
svc.controlnet = None
svc.img2vid = False
svc.txt2vid = False
svc.clip_skip = 0
svc.PipelineType = "StableDiffusionImg2ImgPipeline"
svc.options = {}
# Create a tiny source image for the request's src field
src_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
Image.new("RGB", (4, 4), color="red").save(src_file, format="PNG")
src_file.close()
dst_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
dst_file.close()
try:
request = MagicMock()
request.positive_prompt = "a test prompt"
request.negative_prompt = "bad quality"
request.step = 10
request.seed = 0
request.width = 0
request.height = 0
request.src = src_file.name
request.ref_images = []
request.dst = dst_file.name
svc.GenerateImage(request, context=None)
# The pipeline must have been called with the image kwarg
svc.pipe.assert_called_once()
_, call_kwargs = svc.pipe.call_args
self.assertIn("image", call_kwargs,
"source image must be passed to pipeline via kwargs")
self.assertIn("negative_prompt", call_kwargs,
"negative_prompt must be passed to pipeline via kwargs")
self.assertEqual(call_kwargs["num_inference_steps"], 10)
finally:
os.unlink(src_file.name)
os.unlink(dst_file.name)

View File

@@ -288,7 +288,7 @@ func processImageFile(file string, generatedContentDir string) string {
return ""
}
// write the base64 result
// write the decoded result
writer := bufio.NewWriter(outputFile)
_, err = writer.Write(fileData)
if err != nil {
@@ -296,6 +296,11 @@ func processImageFile(file string, generatedContentDir string) string {
xlog.Error("Failed writing to temporary file", "error", err)
return ""
}
if err := writer.Flush(); err != nil {
outputFile.Close()
xlog.Error("Failed flushing to temporary file", "error", err)
return ""
}
outputFile.Close()
return outputFile.Name()

View File

@@ -0,0 +1,55 @@
package openai
import (
"encoding/base64"
"os"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("processImageFile", func() {
var tmpDir string
BeforeEach(func() {
var err error
tmpDir, err = os.MkdirTemp("", "processimage")
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
os.RemoveAll(tmpDir)
})
It("should decode base64 and write all bytes to disk", func() {
// 4x4 red pixel PNG (68 bytes raw) — small enough to fit in bufio's
// default 4096-byte buffer, which is exactly the scenario where a
// missing Flush() produces a 0-byte file.
pngBytes := []byte{
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature
0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR chunk
0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04,
0x08, 0x02, 0x00, 0x00, 0x00, 0x26, 0x93, 0x09,
0x29, 0x00, 0x00, 0x00, 0x1c, 0x49, 0x44, 0x41, // IDAT chunk
0x54, 0x78, 0x9c, 0x62, 0xf8, 0xcf, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00,
0x00, 0x31, 0x00, 0x01, 0x2e, 0xa8, 0xd1, 0xe5,
0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, // IEND chunk
0xae, 0x42, 0x60, 0x82,
}
b64 := base64.StdEncoding.EncodeToString(pngBytes)
outPath := processImageFile(b64, tmpDir)
Expect(outPath).ToNot(BeEmpty(), "processImageFile should return a file path")
written, err := os.ReadFile(outPath)
Expect(err).ToNot(HaveOccurred())
Expect(written).To(Equal(pngBytes), "file on disk must match the original bytes")
})
It("should return empty string for invalid base64", func() {
outPath := processImageFile("not-valid-base64!!!", tmpDir)
Expect(outPath).To(BeEmpty(), "should return empty string for invalid base64")
})
})

View File

@@ -0,0 +1,13 @@
package openai
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestOpenAI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "OpenAI Endpoints Suite")
}