mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-12 23:55:04 -05:00
feat(diffusers): add experimental support for sd_embed-style prompt embedding (#8504)
* add experimental support for sd_embed-style prompt embedding Signed-off-by: Austen Dicken <cvpcsm@gmail.com> * add doc equivalent to compel Signed-off-by: Austen Dicken <cvpcsm@gmail.com> * need to use flux1 embedding function for flux model Signed-off-by: Austen Dicken <cvpcsm@gmail.com> --------- Signed-off-by: Austen Dicken <cvpcsm@gmail.com>
This commit is contained in:
3
.env
3
.env
@@ -26,6 +26,9 @@
|
||||
## Disables COMPEL (Diffusers)
|
||||
# COMPEL=0
|
||||
|
||||
## Disables SD_EMBED (Diffusers)
|
||||
# SD_EMBED=0
|
||||
|
||||
## Enable/Disable single backend (useful if only one GPU is available)
|
||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ Available pipelines: AnimateDiffPipeline, AnimateDiffVideoToVideoPipeline, ...
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `COMPEL` | `0` | Enable Compel for prompt weighting |
|
||||
| `SD_EMBED` | `0` | Enable sd_embed for prompt weighting |
|
||||
| `XPU` | `0` | Enable Intel XPU support |
|
||||
| `CLIPSKIP` | `1` | Enable CLIP skip support |
|
||||
| `SAFETENSORS` | `1` | Use safetensors format |
|
||||
|
||||
@@ -40,6 +40,7 @@ from compel import Compel, ReturnedEmbeddingsType
|
||||
from optimum.quanto import freeze, qfloat8, quantize
|
||||
from transformers import T5EncoderModel
|
||||
from safetensors.torch import load_file
|
||||
from sd_embed.embedding_funcs import get_weighted_text_embeddings_sd15, get_weighted_text_embeddings_sdxl, get_weighted_text_embeddings_sd3, get_weighted_text_embeddings_flux1
|
||||
|
||||
# Import LTX-2 specific utilities
|
||||
from diffusers.pipelines.ltx2.export_utils import encode_video as ltx2_encode_video
|
||||
@@ -47,6 +48,7 @@ from diffusers import LTX2VideoTransformer3DModel, GGUFQuantizationConfig
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
COMPEL = os.environ.get("COMPEL", "0") == "1"
|
||||
SD_EMBED = os.environ.get("SD_EMBED", "0") == "1"
|
||||
XPU = os.environ.get("XPU", "0") == "1"
|
||||
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
|
||||
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
|
||||
@@ -737,6 +739,51 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
kwargs["prompt_embeds"] = conditioning
|
||||
kwargs["pooled_prompt_embeds"] = pooled
|
||||
# pass the kwargs dictionary to the self.pipe method
|
||||
image = self.pipe(
|
||||
guidance_scale=self.cfg_scale,
|
||||
**kwargs
|
||||
).images[0]
|
||||
elif SD_EMBED:
|
||||
if self.PipelineType == "StableDiffusionPipeline":
|
||||
(
|
||||
kwargs["prompt_embeds"],
|
||||
kwargs["negative_prompt_embeds"],
|
||||
) = get_weighted_text_embeddings_sd15(
|
||||
pipe = self.pipe,
|
||||
prompt = prompt,
|
||||
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None,
|
||||
)
|
||||
if self.PipelineType == "StableDiffusionXLPipeline":
|
||||
(
|
||||
kwargs["prompt_embeds"],
|
||||
kwargs["negative_prompt_embeds"],
|
||||
kwargs["pooled_prompt_embeds"],
|
||||
kwargs["negative_pooled_prompt_embeds"],
|
||||
) = get_weighted_text_embeddings_sdxl(
|
||||
pipe = self.pipe,
|
||||
prompt = prompt,
|
||||
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None
|
||||
)
|
||||
if self.PipelineType == "StableDiffusion3Pipeline":
|
||||
(
|
||||
kwargs["prompt_embeds"],
|
||||
kwargs["negative_prompt_embeds"],
|
||||
kwargs["pooled_prompt_embeds"],
|
||||
kwargs["negative_pooled_prompt_embeds"],
|
||||
) = get_weighted_text_embeddings_sd3(
|
||||
pipe = self.pipe,
|
||||
prompt = prompt,
|
||||
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None
|
||||
)
|
||||
if self.PipelineType == "FluxTransformer2DModel":
|
||||
(
|
||||
kwargs["prompt_embeds"],
|
||||
kwargs["pooled_prompt_embeds"],
|
||||
) = get_weighted_text_embeddings_flux1(
|
||||
pipe = self.pipe,
|
||||
prompt = prompt,
|
||||
)
|
||||
|
||||
image = self.pipe(
|
||||
guidance_scale=self.cfg_scale,
|
||||
**kwargs
|
||||
|
||||
@@ -5,6 +5,7 @@ transformers
|
||||
torchvision==0.22.1
|
||||
accelerate
|
||||
compel
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
sentencepiece
|
||||
torch==2.7.1
|
||||
|
||||
@@ -5,6 +5,7 @@ transformers
|
||||
torchvision
|
||||
accelerate
|
||||
compel
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
sentencepiece
|
||||
torch
|
||||
|
||||
@@ -5,6 +5,7 @@ transformers
|
||||
torchvision
|
||||
accelerate
|
||||
compel
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
sentencepiece
|
||||
torch
|
||||
|
||||
@@ -8,6 +8,7 @@ opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
|
||||
@@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
optimum-quanto
|
||||
numpy<2
|
||||
|
||||
@@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
optimum-quanto
|
||||
numpy<2
|
||||
|
||||
@@ -5,6 +5,7 @@ opencv-python
|
||||
transformers
|
||||
accelerate
|
||||
compel
|
||||
git+https://github.com/xhinker/sd_embed
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
|
||||
Reference in New Issue
Block a user