feat(diffusers): add experimental support for sd_embed-style prompt embedding (#8504)

* add experimental support for sd_embed-style prompt embedding

Signed-off-by: Austen Dicken <cvpcsm@gmail.com>

* add doc equivalent to compel

Signed-off-by: Austen Dicken <cvpcsm@gmail.com>

* need to use flux1 embedding function for flux model

Signed-off-by: Austen Dicken <cvpcsm@gmail.com>

---------

Signed-off-by: Austen Dicken <cvpcsm@gmail.com>
This commit is contained in:
Austen
2026-02-11 15:58:19 -06:00
committed by GitHub
parent 79a25f7ae9
commit cff972094c
10 changed files with 58 additions and 0 deletions

3
.env
View File

@@ -26,6 +26,9 @@
## Disables COMPEL (Diffusers)
# COMPEL=0
## Disables SD_EMBED (Diffusers)
# SD_EMBED=0
## Enable/Disable single backend (useful if only one GPU is available)
# LOCALAI_SINGLE_ACTIVE_BACKEND=true

View File

@@ -115,6 +115,7 @@ Available pipelines: AnimateDiffPipeline, AnimateDiffVideoToVideoPipeline, ...
| Variable | Default | Description |
|----------|---------|-------------|
| `COMPEL` | `0` | Enable Compel for prompt weighting |
| `SD_EMBED` | `0` | Enable sd_embed for prompt weighting |
| `XPU` | `0` | Enable Intel XPU support |
| `CLIPSKIP` | `1` | Enable CLIP skip support |
| `SAFETENSORS` | `1` | Use safetensors format |

View File

@@ -40,6 +40,7 @@ from compel import Compel, ReturnedEmbeddingsType
from optimum.quanto import freeze, qfloat8, quantize
from transformers import T5EncoderModel
from safetensors.torch import load_file
from sd_embed.embedding_funcs import get_weighted_text_embeddings_sd15, get_weighted_text_embeddings_sdxl, get_weighted_text_embeddings_sd3, get_weighted_text_embeddings_flux1
# Import LTX-2 specific utilities
from diffusers.pipelines.ltx2.export_utils import encode_video as ltx2_encode_video
@@ -47,6 +48,7 @@ from diffusers import LTX2VideoTransformer3DModel, GGUFQuantizationConfig
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
COMPEL = os.environ.get("COMPEL", "0") == "1"
SD_EMBED = os.environ.get("SD_EMBED", "0") == "1"
XPU = os.environ.get("XPU", "0") == "1"
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
@@ -737,6 +739,51 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
kwargs["prompt_embeds"] = conditioning
kwargs["pooled_prompt_embeds"] = pooled
# pass the kwargs dictionary to the self.pipe method
image = self.pipe(
guidance_scale=self.cfg_scale,
**kwargs
).images[0]
elif SD_EMBED:
if self.PipelineType == "StableDiffusionPipeline":
(
kwargs["prompt_embeds"],
kwargs["negative_prompt_embeds"],
) = get_weighted_text_embeddings_sd15(
pipe = self.pipe,
prompt = prompt,
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None,
)
if self.PipelineType == "StableDiffusionXLPipeline":
(
kwargs["prompt_embeds"],
kwargs["negative_prompt_embeds"],
kwargs["pooled_prompt_embeds"],
kwargs["negative_pooled_prompt_embeds"],
) = get_weighted_text_embeddings_sdxl(
pipe = self.pipe,
prompt = prompt,
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None
)
if self.PipelineType == "StableDiffusion3Pipeline":
(
kwargs["prompt_embeds"],
kwargs["negative_prompt_embeds"],
kwargs["pooled_prompt_embeds"],
kwargs["negative_pooled_prompt_embeds"],
) = get_weighted_text_embeddings_sd3(
pipe = self.pipe,
prompt = prompt,
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None
)
if self.PipelineType == "FluxTransformer2DModel":
(
kwargs["prompt_embeds"],
kwargs["pooled_prompt_embeds"],
) = get_weighted_text_embeddings_flux1(
pipe = self.pipe,
prompt = prompt,
)
image = self.pipe(
guidance_scale=self.cfg_scale,
**kwargs

View File

@@ -5,6 +5,7 @@ transformers
torchvision==0.22.1
accelerate
compel
git+https://github.com/xhinker/sd_embed
peft
sentencepiece
torch==2.7.1

View File

@@ -5,6 +5,7 @@ transformers
torchvision
accelerate
compel
git+https://github.com/xhinker/sd_embed
peft
sentencepiece
torch

View File

@@ -5,6 +5,7 @@ transformers
torchvision
accelerate
compel
git+https://github.com/xhinker/sd_embed
peft
sentencepiece
torch

View File

@@ -8,6 +8,7 @@ opencv-python
transformers
accelerate
compel
git+https://github.com/xhinker/sd_embed
peft
sentencepiece
optimum-quanto

View File

@@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers
transformers
accelerate
compel
git+https://github.com/xhinker/sd_embed
peft
optimum-quanto
numpy<2

View File

@@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers
transformers
accelerate
compel
git+https://github.com/xhinker/sd_embed
peft
optimum-quanto
numpy<2

View File

@@ -5,6 +5,7 @@ opencv-python
transformers
accelerate
compel
git+https://github.com/xhinker/sd_embed
peft
sentencepiece
optimum-quanto