feat(diffusers): implement dynamic pipeline loader to remove per-pipeline conditionals (#7365)

* Initial plan Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add dynamic loader for diffusers pipelines and refactor backend.py Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix pipeline discovery error handling and test mock issue Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Address code review feedback: direct imports, better error handling, improved tests Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Address remaining code review feedback: specific exceptions, registry access, test imports Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add defensive fallback for DiffusionPipeline registry access Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Actually use dynamic pipeline loading for all pipelines in backend Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use dynamic loader consistently for all pipelines including AutoPipelineForText2Image Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dynamic loader tests into test.py for CI compatibility Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Extend dynamic loader to discover any diffusers class type, not just DiffusionPipeline Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add AutoPipeline classes to pipeline registry for default model loading Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(python): set pyvenv python home Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do pyenv update during start Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Minor changes Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-06-03 13:56:46 -04:00 · 2025-12-04 19:02:06 +01:00
parent 92ee8c2256
commit 1abbedd732
5 changed files with 1141 additions and 155 deletions
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -1,4 +1,10 @@
 #!/usr/bin/env python3
+"""
+LocalAI Diffusers Backend
+
+This backend provides gRPC access to diffusers pipelines with dynamic pipeline loading.
+New pipelines added to diffusers become available automatically without code changes.
+"""
 from concurrent import futures
 import traceback
 import argparse
@@ -17,14 +23,22 @@ import backend_pb2_grpc

 import grpc

-from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
-    EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel, QwenImageEditPipeline, AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline
-from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline, Lumina2Text2ImgPipeline
+# Import dynamic loader for pipeline discovery
+from diffusers_dynamic_loader import (
+    get_pipeline_registry,
+    resolve_pipeline_class,
+    get_available_pipelines,
+    load_diffusers_pipeline,
+)
+
+# Import specific items still needed for special cases and safety checker
+from diffusers import DiffusionPipeline, ControlNetModel
+from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKLWan
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image, export_to_video
 from compel import Compel, ReturnedEmbeddingsType
 from optimum.quanto import freeze, qfloat8, quantize
-from transformers import CLIPTextModel, T5EncoderModel
+from transformers import T5EncoderModel
 from safetensors.torch import load_file

 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -158,6 +172,165 @@ def get_scheduler(name: str, config: dict = {}):

 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
+
+    def _load_pipeline(self, request, modelFile, fromSingleFile, torchType, variant):
+        """
+        Load a diffusers pipeline dynamically using the dynamic loader.
+
+        This method uses load_diffusers_pipeline() for most pipelines, falling back
+        to explicit handling only for pipelines requiring custom initialization
+        (e.g., quantization, special VAE handling).
+
+        Args:
+            request: The gRPC request containing pipeline configuration
+            modelFile: Path to the model file (for single file loading)
+            fromSingleFile: Whether to use from_single_file() vs from_pretrained()
+            torchType: The torch dtype to use
+            variant: Model variant (e.g., "fp16")
+
+        Returns:
+            The loaded pipeline instance
+        """
+        pipeline_type = request.PipelineType
+
+        # Handle IMG2IMG request flag with default pipeline
+        if request.IMG2IMG and pipeline_type == "":
+            pipeline_type = "StableDiffusionImg2ImgPipeline"
+
+        # ================================================================
+        # Special cases requiring custom initialization logic
+        # Only handle pipelines that truly need custom code (quantization,
+        # special VAE handling, etc.). All other pipelines use dynamic loading.
+        # ================================================================
+
+        # FluxTransformer2DModel - requires quantization and custom transformer loading
+        if pipeline_type == "FluxTransformer2DModel":
+            dtype = torch.bfloat16
+            bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
+
+            transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
+            quantize(transformer, weights=qfloat8)
+            freeze(transformer)
+            text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
+            quantize(text_encoder_2, weights=qfloat8)
+            freeze(text_encoder_2)
+
+            pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
+            pipe.transformer = transformer
+            pipe.text_encoder_2 = text_encoder_2
+
+            if request.LowVRAM:
+                pipe.enable_model_cpu_offload()
+            return pipe
+
+        # WanPipeline - requires special VAE with float32 dtype
+        if pipeline_type == "WanPipeline":
+            vae = AutoencoderKLWan.from_pretrained(
+                request.Model,
+                subfolder="vae",
+                torch_dtype=torch.float32
+            )
+            pipe = load_diffusers_pipeline(
+                class_name="WanPipeline",
+                model_id=request.Model,
+                vae=vae,
+                torch_dtype=torchType
+            )
+            self.txt2vid = True
+            return pipe
+
+        # WanImageToVideoPipeline - requires special VAE with float32 dtype
+        if pipeline_type == "WanImageToVideoPipeline":
+            vae = AutoencoderKLWan.from_pretrained(
+                request.Model,
+                subfolder="vae",
+                torch_dtype=torch.float32
+            )
+            pipe = load_diffusers_pipeline(
+                class_name="WanImageToVideoPipeline",
+                model_id=request.Model,
+                vae=vae,
+                torch_dtype=torchType
+            )
+            self.img2vid = True
+            return pipe
+
+        # SanaPipeline - requires special VAE and text encoder dtype conversion
+        if pipeline_type == "SanaPipeline":
+            pipe = load_diffusers_pipeline(
+                class_name="SanaPipeline",
+                model_id=request.Model,
+                variant="bf16",
+                torch_dtype=torch.bfloat16
+            )
+            pipe.vae.to(torch.bfloat16)
+            pipe.text_encoder.to(torch.bfloat16)
+            return pipe
+
+        # VideoDiffusionPipeline - alias for DiffusionPipeline with txt2vid flag
+        if pipeline_type == "VideoDiffusionPipeline":
+            self.txt2vid = True
+            pipe = load_diffusers_pipeline(
+                class_name="DiffusionPipeline",
+                model_id=request.Model,
+                torch_dtype=torchType
+            )
+            return pipe
+
+        # StableVideoDiffusionPipeline - needs img2vid flag and CPU offload
+        if pipeline_type == "StableVideoDiffusionPipeline":
+            self.img2vid = True
+            pipe = load_diffusers_pipeline(
+                class_name="StableVideoDiffusionPipeline",
+                model_id=request.Model,
+                torch_dtype=torchType,
+                variant=variant
+            )
+            if not DISABLE_CPU_OFFLOAD:
+                pipe.enable_model_cpu_offload()
+            return pipe
+
+        # ================================================================
+        # Dynamic pipeline loading - the default path for most pipelines
+        # Uses the dynamic loader to instantiate any pipeline by class name
+        # ================================================================
+
+        # Build kwargs for dynamic loading
+        load_kwargs = {"torch_dtype": torchType}
+
+        # Add variant if not loading from single file
+        if not fromSingleFile and variant:
+            load_kwargs["variant"] = variant
+
+        # Add use_safetensors for from_pretrained
+        if not fromSingleFile:
+            load_kwargs["use_safetensors"] = SAFETENSORS
+
+        # Determine pipeline class name - default to AutoPipelineForText2Image
+        effective_pipeline_type = pipeline_type if pipeline_type else "AutoPipelineForText2Image"
+
+        # Use dynamic loader for all pipelines
+        try:
+            pipe = load_diffusers_pipeline(
+                class_name=effective_pipeline_type,
+                model_id=modelFile if fromSingleFile else request.Model,
+                from_single_file=fromSingleFile,
+                **load_kwargs
+            )
+        except Exception as e:
+            # Provide helpful error with available pipelines
+            available = get_available_pipelines()
+            raise ValueError(
+                f"Failed to load pipeline '{effective_pipeline_type}': {e}\n"
+                f"Available pipelines: {', '.join(available[:30])}..."
+            ) from e
+
+        # Apply LowVRAM optimization if supported and requested
+        if request.LowVRAM and hasattr(pipe, 'enable_model_cpu_offload'):
+            pipe.enable_model_cpu_offload()
+
+        return pipe
+
    def Health(self, request, context):
        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))

@@ -231,139 +404,16 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
            self.img2vid = False
            self.txt2vid = False
-            ## img2img
-            if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
-                if fromSingleFile:
-                    self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
-                                                                                torch_dtype=torchType)
-                else:
-                    self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
-                                                                               torch_dtype=torchType)

-            elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
-                self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
-                                                                             torch_dtype=torchType)
-            ## img2vid
-            elif request.PipelineType == "StableVideoDiffusionPipeline":
-                self.img2vid = True
-                self.pipe = StableVideoDiffusionPipeline.from_pretrained(
-                    request.Model, torch_dtype=torchType, variant=variant
-                )
-                if not DISABLE_CPU_OFFLOAD:
-                    self.pipe.enable_model_cpu_offload()
-            ## text2img
-            elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
-                self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
-                                                                      torch_dtype=torchType,
-                                                                      use_safetensors=SAFETENSORS,
-                                                                      variant=variant)
-            elif request.PipelineType == "StableDiffusionPipeline":
-                if fromSingleFile:
-                    self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
-                                                                         torch_dtype=torchType)
-                else:
-                    self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
-                                                                        torch_dtype=torchType)
-            elif request.PipelineType == "DiffusionPipeline":
-                self.pipe = DiffusionPipeline.from_pretrained(request.Model,
-                                                              torch_dtype=torchType)
-            elif request.PipelineType == "QwenImageEditPipeline":
-                self.pipe = QwenImageEditPipeline.from_pretrained(request.Model,
-                                                                 torch_dtype=torchType)
-            elif request.PipelineType == "VideoDiffusionPipeline":
-                self.txt2vid = True
-                self.pipe = DiffusionPipeline.from_pretrained(request.Model,
-                                                              torch_dtype=torchType)
-            elif request.PipelineType == "StableDiffusionXLPipeline":
-                if fromSingleFile:
-                    self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
-                                                                           torch_dtype=torchType,
-                                                                           use_safetensors=True)
-                else:
-                    self.pipe = StableDiffusionXLPipeline.from_pretrained(
-                        request.Model,
-                        torch_dtype=torchType,
-                        use_safetensors=True,
-                        variant=variant)
-            elif request.PipelineType == "StableDiffusion3Pipeline":
-                if fromSingleFile:
-                    self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
-                                                                          torch_dtype=torchType,
-                                                                          use_safetensors=True)
-                else:
-                    self.pipe = StableDiffusion3Pipeline.from_pretrained(
-                        request.Model,
-                        torch_dtype=torchType,
-                        use_safetensors=True,
-                        variant=variant)
-            elif request.PipelineType == "FluxPipeline":
-                if fromSingleFile:
-                    self.pipe = FluxPipeline.from_single_file(modelFile,
-                                                              torch_dtype=torchType,
-                                                              use_safetensors=True)
-                else:
-                    self.pipe = FluxPipeline.from_pretrained(
-                        request.Model,
-                        torch_dtype=torch.bfloat16)
-                if request.LowVRAM:
-                    self.pipe.enable_model_cpu_offload()
-            elif request.PipelineType == "FluxTransformer2DModel":
-                    dtype = torch.bfloat16
-                    # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
-                    bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
-
-                    transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
-                    quantize(transformer, weights=qfloat8)
-                    freeze(transformer)
-                    text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
-                    quantize(text_encoder_2, weights=qfloat8)
-                    freeze(text_encoder_2)
-
-                    self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
-                    self.pipe.transformer = transformer
-                    self.pipe.text_encoder_2 = text_encoder_2
-
-                    if request.LowVRAM:
-                        self.pipe.enable_model_cpu_offload()
-            elif request.PipelineType == "Lumina2Text2ImgPipeline":
-                self.pipe = Lumina2Text2ImgPipeline.from_pretrained(
-                    request.Model,
-                    torch_dtype=torch.bfloat16)
-                if request.LowVRAM:
-                    self.pipe.enable_model_cpu_offload()
-            elif request.PipelineType == "SanaPipeline":
-                self.pipe = SanaPipeline.from_pretrained(
-                    request.Model,
-                    variant="bf16",
-                    torch_dtype=torch.bfloat16)
-                self.pipe.vae.to(torch.bfloat16)
-                self.pipe.text_encoder.to(torch.bfloat16)
-            elif request.PipelineType == "WanPipeline":
-                # WAN2.2 pipeline requires special VAE handling
-                vae = AutoencoderKLWan.from_pretrained(
-                    request.Model, 
-                    subfolder="vae", 
-                    torch_dtype=torch.float32
-                )
-                self.pipe = WanPipeline.from_pretrained(
-                    request.Model,
-                    vae=vae,
-                    torch_dtype=torchType
-                )
-                self.txt2vid = True  # WAN2.2 is a text-to-video pipeline
-            elif request.PipelineType == "WanImageToVideoPipeline":
-                # WAN2.2 image-to-video pipeline
-                vae = AutoencoderKLWan.from_pretrained(
-                    request.Model, 
-                    subfolder="vae", 
-                    torch_dtype=torch.float32
-                )
-                self.pipe = WanImageToVideoPipeline.from_pretrained(
-                    request.Model,
-                    vae=vae,
-                    torch_dtype=torchType
-                )
-                self.img2vid = True  # WAN2.2 image-to-video pipeline
+            # Load pipeline using dynamic loader
+            # Special cases that require custom initialization are handled first
+            self.pipe = self._load_pipeline(
+                request=request,
+                modelFile=modelFile,
+                fromSingleFile=fromSingleFile,
+                torchType=torchType,
+                variant=variant
+            )

            if CLIPSKIP and request.CLIPSkip != 0:
                self.clip_skip = request.CLIPSkip
@@ -501,10 +551,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

        # create a dictionary of values for the parameters
        options = {
-            "negative_prompt": request.negative_prompt,
            "num_inference_steps": steps,
        }

+        if hasattr(request, 'negative_prompt') and request.negative_prompt != "":
+            options["negative_prompt"] = request.negative_prompt
+
        # Handle image source: prioritize RefImages over request.src
        image_src = None
        if hasattr(request, 'ref_images') and request.ref_images and len(request.ref_images) > 0:
@@ -528,17 +580,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        if CLIPSKIP and self.clip_skip != 0:
            options["clip_skip"] = self.clip_skip

-        # Get the keys that we will build the args for our pipe for
-        keys = options.keys()
-
-        if request.EnableParameters != "":
-            keys = [key.strip() for key in request.EnableParameters.split(",")]
-
-        if request.EnableParameters == "none":
-            keys = []
-
-        # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
-        kwargs = {key: options.get(key) for key in keys if key in options}
+        kwargs = {}

        # populate kwargs from self.options.
        kwargs.update(self.options)