Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
9c40d9bbed feat(diffusers): add builds for nvidia-l4t
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-08-08 22:50:40 +02:00
10 changed files with 89 additions and 143 deletions

View File

@@ -90,7 +90,7 @@ jobs:
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-l4t-diffusers'
runs-on: 'ubuntu-24.04-arm'

View File

@@ -23,20 +23,6 @@ jobs:
matrix:
go-version: ['1.21.x']
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Release space from worker
run: |
echo "Listing top largest packages"

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=be48528b068111304e4a0bb82c028558b5705f05
LLAMA_VERSION?=a0552c8beef74e843bb085c8ef0c63f9ed7a2b27
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -6,7 +6,7 @@ CMAKE_ARGS?=
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=b02242d0adb5c6c4896d59ac86d9ec9fe0d0fe33
WHISPER_CPP_VERSION?=4245c77b654cd384ad9f53a4a302be716b3e5861
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)

View File

@@ -6,5 +6,4 @@ accelerate
compel
peft
optimum-quanto
numpy<2
sentencepiece
numpy<2

View File

@@ -59,10 +59,8 @@ func New(opts ...config.AppOption) (*Application, error) {
log.Error().Err(err).Msg("error installing models")
}
for _, backend := range options.ExternalBackends {
if err := coreStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, backend, "", ""); err != nil {
log.Error().Err(err).Msg("error installing external backend")
}
if err := coreStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, options.ExternalBackends...); err != nil {
log.Error().Err(err).Msg("error installing external backends")
}
configLoaderOpts := options.ToConfigLoaderOptions()

View File

@@ -23,9 +23,7 @@ type BackendsList struct {
}
type BackendsInstall struct {
BackendArgs string `arg:"" optional:"" name:"backend" help:"Backend configuration URL to load"`
Name string `arg:"" optional:"" name:"name" help:"Name of the backend"`
Alias string `arg:"" optional:"" name:"alias" help:"Alias of the backend"`
BackendArgs []string `arg:"" optional:"" name:"backends" help:"Backend configuration URLs to load"`
BackendsCMDFlags `embed:""`
}
@@ -68,25 +66,27 @@ func (bi *BackendsInstall) Run(ctx *cliContext.Context) error {
log.Error().Err(err).Msg("unable to load galleries")
}
progressBar := progressbar.NewOptions(
1000,
progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", bi.BackendArgs)),
progressbar.OptionShowBytes(false),
progressbar.OptionClearOnFinish(),
)
progressCallback := func(fileName string, current string, total string, percentage float64) {
v := int(percentage * 10)
err := progressBar.Set(v)
for _, backendName := range bi.BackendArgs {
progressBar := progressbar.NewOptions(
1000,
progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", backendName)),
progressbar.OptionShowBytes(false),
progressbar.OptionClearOnFinish(),
)
progressCallback := func(fileName string, current string, total string, percentage float64) {
v := int(percentage * 10)
err := progressBar.Set(v)
if err != nil {
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
}
}
err := startup.InstallExternalBackends(galleries, bi.BackendsPath, progressCallback, backendName)
if err != nil {
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
return err
}
}
err := startup.InstallExternalBackends(galleries, bi.BackendsPath, progressCallback, bi.BackendArgs, bi.Name, bi.Alias)
if err != nil {
return err
}
return nil
}

View File

@@ -1,6 +1,7 @@
package startup
import (
"errors"
"fmt"
"path/filepath"
"strings"
@@ -12,68 +13,49 @@ import (
"github.com/rs/zerolog/log"
)
func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backend, name, alias string) error {
func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error {
var errs error
systemState, err := system.GetSystemState()
if err != nil {
return fmt.Errorf("failed to get system state: %w", err)
}
uri := downloader.URI(backend)
switch {
case uri.LooksLikeDir():
if name == "" { // infer it from the path
name = filepath.Base(backend)
}
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from path")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
Alias: alias,
URI: backend,
}, downloadStatus); err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
}
case uri.LooksLikeOCI() && !uri.LooksLikeOCIFile():
if name == "" {
return fmt.Errorf("specifying a name is required for OCI images")
}
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from OCI image")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
Alias: alias,
URI: backend,
}, downloadStatus); err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
}
case uri.LooksLikeOCIFile():
name, err := uri.FilenameFromUrl()
if err != nil {
return fmt.Errorf("failed to get filename from URL: %w", err)
}
// strip extension if any
name = strings.TrimSuffix(name, filepath.Ext(name))
for _, backend := range backends {
uri := downloader.URI(backend)
switch {
case uri.LooksLikeDir():
name := filepath.Base(backend)
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from path")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
URI: backend,
}, downloadStatus); err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
case uri.LooksLikeOCI():
name, err := uri.FilenameFromUrl()
if err != nil {
return fmt.Errorf("failed to get filename from URL: %w", err)
}
// strip extension if any
name = strings.TrimSuffix(name, filepath.Ext(name))
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from OCI image")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
Alias: alias,
URI: backend,
}, downloadStatus); err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
}
default:
if name != "" || alias != "" {
return fmt.Errorf("specifying a name or alias is not supported for this backend")
}
err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus, true)
if err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from OCI image")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
URI: backend,
}, downloadStatus); err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
default:
err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus, true)
if err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
}
}
return nil
return errs
}

View File

@@ -105,8 +105,8 @@
model: gpt-oss-20b-mxfp4.gguf
files:
- filename: gpt-oss-20b-mxfp4.gguf
sha256: 52f57ab7d3df3ba9173827c1c6832e73375553a846f3e32b49f1ae2daad688d4
uri: huggingface://ggml-org/gpt-oss-20b-GGUF/gpt-oss-20b-mxfp4.gguf
sha256: be37a636aca0fc1aae0d32325f82f6b4d21495f06823b5fbc1898ae0303e9935
- !!merge <<: *gptoss
name: "gpt-oss-120b"
url: "github:mudler/LocalAI/gallery/harmony.yaml@master"
@@ -119,14 +119,14 @@
model: gpt-oss-120b-mxfp4-00001-of-00003.gguf
files:
- filename: gpt-oss-120b-mxfp4-00001-of-00003.gguf
sha256: 40b630223b9fc43820fa0aae5d0ab61020f5858d1719642357753dca9e7df29f
uri: huggingface://ggml-org/gpt-oss-120b-GGUF/gpt-oss-120b-mxfp4-00001-of-00003.gguf
sha256: e2865eb6c1df7b2ffbebf305cd5d9074d5ccc0fe3b862f98d343a46dad1606f9
- filename: gpt-oss-120b-mxfp4-00002-of-00003.gguf
sha256: fbdb8cdec70edb82c53bfc69cc0f54a34759a23d317fa0771a63be6571907b38
uri: huggingface://ggml-org/gpt-oss-120b-GGUF/gpt-oss-120b-mxfp4-00002-of-00003.gguf
sha256: 346492f65891fb27cac5c74a8c07626cbfeb4211cd391ec4de37dbbe3109a93b
- filename: gpt-oss-120b-mxfp4-00003-of-00003.gguf
sha256: b326bfd8ac696c4b9a14e9e84d5529b2bb86847aea0e65443cbf075accba8b71
uri: huggingface://ggml-org/gpt-oss-120b-GGUF/gpt-oss-120b-mxfp4-00003-of-00003.gguf
sha256: 66dca81040933f5a49177e82c479c51319cefb83bd22dad9f06dad45e25f1463
- !!merge <<: *gptoss
name: "openai_gpt-oss-20b-neo"
icon: https://huggingface.co/DavidAU/Openai_gpt-oss-20b-NEO-GGUF/resolve/main/matrix1.gif
@@ -312,7 +312,7 @@
- https://huggingface.co/Dream-org/Dream-v0-Instruct-7B
- https://huggingface.co/bartowski/Dream-org_Dream-v0-Instruct-7B-GGUF
description: |
This is the instruct model of Dream 7B, which is an open diffusion large language model with top-tier performance.
This is the instruct model of Dream 7B, which is an open diffusion large language model with top-tier performance.
overrides:
parameters:
model: Dream-org_Dream-v0-Instruct-7B-Q4_K_M.gguf
@@ -14446,21 +14446,6 @@
- filename: entfane_math-genius-7B-Q4_K_M.gguf
sha256: cd3a3c898a2dfb03d17a66db81b743f2d66981e0ceb92e8669a4af61217feed7
uri: huggingface://bartowski/entfane_math-genius-7B-GGUF/entfane_math-genius-7B-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "impish_nemo_12b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://huggingface.co/SicariusSicariiStuff/Impish_Nemo_12B/resolve/main/Images/Impish_Nemo_12B.png
urls:
- https://huggingface.co/SicariusSicariiStuff/Impish_Nemo_12B
- https://huggingface.co/SicariusSicariiStuff/Impish_Nemo_12B_GGUF
description: "August 2025, Impish_Nemo_12B — my best model yet. And unlike a typical Nemo, this one can take in much higher temperatures (works well with 1+). Oh, and regarding following the character card: It somehow gotten even better, to the point of it being straight up uncanny \U0001F643 (I had to check twice that this model was loaded, and not some 70B!)\n\nI feel like this model could easily replace models much larger than itself for adventure or roleplay, for assistant tasks, obviously not, but the creativity here? Off the charts. Characters have never felt so alive and in the moment before — theyll use insinuation, manipulation, and, if needed (or provoked) — force. They feel so very present.\n\nThat look on Neos face when he opened his eyes and said, “I know Kung Fu”? Well, Impish_Nemo_12B had pretty much the same moment — and it now knows more than just Kung Fu, much, much more. It wasnt easy, and its a niche within a niche, but as promised almost half a year ago — it is now done.\n\nImpish_Nemo_12B is smart, sassy, creative, and got a lot of unhingedness too — these are baked-in deep into every interaction. It took the innate Mistral's relative freedom, and turned it up to 11. It very well maybe too much for many, but after testing and interacting with so many models, I find this 'edge' of sorts, rather fun and refreshing.\n\nAnyway, the dataset used is absolutely massive, tons of new types of data and new domains of knowledge (Morrowind fandom, fighting, etc...). The whole dataset is a very well-balanced mix, and resulted in a model with extremely strong common sense for a 12B. Regarding response length — there's almost no response-length bias here, this one is very much dynamic and will easily adjust reply length based on 13 examples of provided dialogue.\n\nOh, and the model comes with 3 new Character Cards, 2 Roleplay and 1 Adventure!\n"
overrides:
parameters:
model: Impish_Nemo_12B-Q6_K.gguf
files:
- filename: Impish_Nemo_12B-Q6_K.gguf
sha256: e0ce3adbed2718e144f477721c2ad68b6e3cccd95fc27dbe8f0135be76c99c72
uri: huggingface://SicariusSicariiStuff/Impish_Nemo_12B_GGUF/Impish_Nemo_12B-Q6_K.gguf
- &mudler
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
name: "LocalAI-llama3-8b-function-call-v0.2"
@@ -19475,14 +19460,14 @@
url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
icon: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev/media/main/teaser.png
description: |
FLUX.1 Kontext [dev] is a 12 billion parameter rectified flow transformer capable of editing images based on text instructions. For more information, please read our blog post and our technical report. You can find information about the [pro] version in here.
Key Features
Change existing images based on an edit instruction.
Have character, style and object reference without any finetuning.
Robust consistency allows users to refine an image through multiple successive edits with minimal visual drift.
Trained using guidance distillation, making FLUX.1 Kontext [dev] more efficient.
Open weights to drive new scientific research, and empower artists to develop innovative workflows.
Generated outputs can be used for personal, scientific, and commercial purposes, as described in the FLUX.1 [dev] Non-Commercial License.
FLUX.1 Kontext [dev] is a 12 billion parameter rectified flow transformer capable of editing images based on text instructions. For more information, please read our blog post and our technical report. You can find information about the [pro] version in here.
Key Features
Change existing images based on an edit instruction.
Have character, style and object reference without any finetuning.
Robust consistency allows users to refine an image through multiple successive edits with minimal visual drift.
Trained using guidance distillation, making FLUX.1 Kontext [dev] more efficient.
Open weights to drive new scientific research, and empower artists to develop innovative workflows.
Generated outputs can be used for personal, scientific, and commercial purposes, as described in the FLUX.1 [dev] Non-Commercial License.
urls:
- https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
- https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF

View File

@@ -98,19 +98,19 @@ func (uri URI) DownloadWithAuthorizationAndCallback(basePath string, authorizati
}
func (u URI) FilenameFromUrl() (string, error) {
if f := filenameFromUrl(string(u)); f != "" {
return f, nil
f, err := filenameFromUrl(string(u))
if err != nil || f == "" {
f = utils.MD5(string(u))
if strings.HasSuffix(string(u), ".yaml") || strings.HasSuffix(string(u), ".yml") {
f = f + ".yaml"
}
err = nil
}
f := utils.MD5(string(u))
if strings.HasSuffix(string(u), ".yaml") || strings.HasSuffix(string(u), ".yml") {
f = f + ".yaml"
}
return f, nil
return f, err
}
func filenameFromUrl(urlstr string) string {
func filenameFromUrl(urlstr string) (string, error) {
// strip anything after @
if strings.Contains(urlstr, "@") {
urlstr = strings.Split(urlstr, "@")[0]
@@ -118,13 +118,13 @@ func filenameFromUrl(urlstr string) string {
u, err := url.Parse(urlstr)
if err != nil {
return ""
return "", fmt.Errorf("error due to parsing url: %w", err)
}
x, err := url.QueryUnescape(u.EscapedPath())
if err != nil {
return ""
return "", fmt.Errorf("error due to escaping: %w", err)
}
return filepath.Base(x)
return filepath.Base(x), nil
}
func (u URI) LooksLikeURL() bool {
@@ -158,10 +158,6 @@ func (s URI) LooksLikeOCI() bool {
strings.HasPrefix(string(s), "docker.io")
}
func (s URI) LooksLikeOCIFile() bool {
return strings.HasPrefix(string(s), OCIFilePrefix)
}
func (s URI) ResolveURL() string {
switch {
case strings.HasPrefix(string(s), GithubURI2):