Compare commits

..

11 Commits

Author SHA1 Message Date
LocalAI [bot]
b2e8b6d1aa chore: ⬆️ Update ggml-org/llama.cpp to be48528b068111304e4a0bb82c028558b5705f05 (#6012)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-08-11 21:06:10 +00:00
LocalAI [bot]
fba5b557a1 chore: ⬆️ Update ggml-org/whisper.cpp to b02242d0adb5c6c4896d59ac86d9ec9fe0d0fe33 (#6009)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-08-11 12:54:41 +02:00
LocalAI [bot]
6db19c5cb9 chore: ⬆️ Update ggml-org/llama.cpp to 79c1160b073b8148a404f3dd2584be1606dccc66 (#6006)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-08-11 12:54:21 +02:00
Ettore Di Giacinto
5428678209 chore(ci): more cleanup
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-08-11 10:10:38 +02:00
LocalAI [bot]
06129139eb chore(model-gallery): ⬆️ update checksum (#6010)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-08-11 07:54:01 +02:00
Ettore Di Giacinto
05757e2738 feat(backends install): allow to specify name and alias during manual installation (#5971)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-08-10 10:05:53 +02:00
Ettore Di Giacinto
240b790f29 chore(model gallery): add impish_nemo_12b (#6007)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-08-10 10:05:20 +02:00
Ettore Di Giacinto
5f221f5946 fix(l4t-diffusers): add sentencepiece (#6005)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-08-09 09:08:35 +02:00
LocalAI [bot]
def7cdc0bf chore: ⬆️ Update ggml-org/llama.cpp to cd6983d56d2cce94ecb86bb114ae8379a609073c (#6003)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-08-09 08:41:58 +02:00
Ettore Di Giacinto
ea9bf3dba2 Update backend.yml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-08-08 23:00:47 +02:00
Ettore Di Giacinto
b8eca530b6 feat(diffusers): add builds for nvidia-l4t (#6004)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-08-08 22:48:38 +02:00
10 changed files with 143 additions and 89 deletions

View File

@@ -90,7 +90,7 @@ jobs:
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-l4t-diffusers'
runs-on: 'ubuntu-24.04-arm'

View File

@@ -23,6 +23,20 @@ jobs:
matrix:
go-version: ['1.21.x']
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Release space from worker
run: |
echo "Listing top largest packages"

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=a0552c8beef74e843bb085c8ef0c63f9ed7a2b27
LLAMA_VERSION?=be48528b068111304e4a0bb82c028558b5705f05
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -6,7 +6,7 @@ CMAKE_ARGS?=
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=4245c77b654cd384ad9f53a4a302be716b3e5861
WHISPER_CPP_VERSION?=b02242d0adb5c6c4896d59ac86d9ec9fe0d0fe33
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)

View File

@@ -6,4 +6,5 @@ accelerate
compel
peft
optimum-quanto
numpy<2
numpy<2
sentencepiece

View File

@@ -59,8 +59,10 @@ func New(opts ...config.AppOption) (*Application, error) {
log.Error().Err(err).Msg("error installing models")
}
if err := coreStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, options.ExternalBackends...); err != nil {
log.Error().Err(err).Msg("error installing external backends")
for _, backend := range options.ExternalBackends {
if err := coreStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, backend, "", ""); err != nil {
log.Error().Err(err).Msg("error installing external backend")
}
}
configLoaderOpts := options.ToConfigLoaderOptions()

View File

@@ -23,7 +23,9 @@ type BackendsList struct {
}
type BackendsInstall struct {
BackendArgs []string `arg:"" optional:"" name:"backends" help:"Backend configuration URLs to load"`
BackendArgs string `arg:"" optional:"" name:"backend" help:"Backend configuration URL to load"`
Name string `arg:"" optional:"" name:"name" help:"Name of the backend"`
Alias string `arg:"" optional:"" name:"alias" help:"Alias of the backend"`
BackendsCMDFlags `embed:""`
}
@@ -66,27 +68,25 @@ func (bi *BackendsInstall) Run(ctx *cliContext.Context) error {
log.Error().Err(err).Msg("unable to load galleries")
}
for _, backendName := range bi.BackendArgs {
progressBar := progressbar.NewOptions(
1000,
progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", backendName)),
progressbar.OptionShowBytes(false),
progressbar.OptionClearOnFinish(),
)
progressCallback := func(fileName string, current string, total string, percentage float64) {
v := int(percentage * 10)
err := progressBar.Set(v)
if err != nil {
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
}
}
err := startup.InstallExternalBackends(galleries, bi.BackendsPath, progressCallback, backendName)
progressBar := progressbar.NewOptions(
1000,
progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", bi.BackendArgs)),
progressbar.OptionShowBytes(false),
progressbar.OptionClearOnFinish(),
)
progressCallback := func(fileName string, current string, total string, percentage float64) {
v := int(percentage * 10)
err := progressBar.Set(v)
if err != nil {
return err
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
}
}
err := startup.InstallExternalBackends(galleries, bi.BackendsPath, progressCallback, bi.BackendArgs, bi.Name, bi.Alias)
if err != nil {
return err
}
return nil
}

View File

@@ -1,7 +1,6 @@
package startup
import (
"errors"
"fmt"
"path/filepath"
"strings"
@@ -13,49 +12,68 @@ import (
"github.com/rs/zerolog/log"
)
func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error {
var errs error
func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backend, name, alias string) error {
systemState, err := system.GetSystemState()
if err != nil {
return fmt.Errorf("failed to get system state: %w", err)
}
for _, backend := range backends {
uri := downloader.URI(backend)
switch {
case uri.LooksLikeDir():
name := filepath.Base(backend)
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from path")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
URI: backend,
}, downloadStatus); err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
case uri.LooksLikeOCI():
name, err := uri.FilenameFromUrl()
if err != nil {
return fmt.Errorf("failed to get filename from URL: %w", err)
}
// strip extension if any
name = strings.TrimSuffix(name, filepath.Ext(name))
uri := downloader.URI(backend)
switch {
case uri.LooksLikeDir():
if name == "" { // infer it from the path
name = filepath.Base(backend)
}
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from path")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
Alias: alias,
URI: backend,
}, downloadStatus); err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
}
case uri.LooksLikeOCI() && !uri.LooksLikeOCIFile():
if name == "" {
return fmt.Errorf("specifying a name is required for OCI images")
}
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from OCI image")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
Alias: alias,
URI: backend,
}, downloadStatus); err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
}
case uri.LooksLikeOCIFile():
name, err := uri.FilenameFromUrl()
if err != nil {
return fmt.Errorf("failed to get filename from URL: %w", err)
}
// strip extension if any
name = strings.TrimSuffix(name, filepath.Ext(name))
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from OCI image")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
URI: backend,
}, downloadStatus); err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
default:
err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus, true)
if err != nil {
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
}
log.Info().Str("backend", backend).Str("name", name).Msg("Installing backend from OCI image")
if err := gallery.InstallBackend(backendPath, &gallery.GalleryBackend{
Metadata: gallery.Metadata{
Name: name,
},
Alias: alias,
URI: backend,
}, downloadStatus); err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
}
default:
if name != "" || alias != "" {
return fmt.Errorf("specifying a name or alias is not supported for this backend")
}
err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus, true)
if err != nil {
return fmt.Errorf("error installing backend %s: %w", backend, err)
}
}
return errs
return nil
}

View File

@@ -105,8 +105,8 @@
model: gpt-oss-20b-mxfp4.gguf
files:
- filename: gpt-oss-20b-mxfp4.gguf
sha256: 52f57ab7d3df3ba9173827c1c6832e73375553a846f3e32b49f1ae2daad688d4
uri: huggingface://ggml-org/gpt-oss-20b-GGUF/gpt-oss-20b-mxfp4.gguf
sha256: be37a636aca0fc1aae0d32325f82f6b4d21495f06823b5fbc1898ae0303e9935
- !!merge <<: *gptoss
name: "gpt-oss-120b"
url: "github:mudler/LocalAI/gallery/harmony.yaml@master"
@@ -119,14 +119,14 @@
model: gpt-oss-120b-mxfp4-00001-of-00003.gguf
files:
- filename: gpt-oss-120b-mxfp4-00001-of-00003.gguf
sha256: 40b630223b9fc43820fa0aae5d0ab61020f5858d1719642357753dca9e7df29f
uri: huggingface://ggml-org/gpt-oss-120b-GGUF/gpt-oss-120b-mxfp4-00001-of-00003.gguf
sha256: e2865eb6c1df7b2ffbebf305cd5d9074d5ccc0fe3b862f98d343a46dad1606f9
- filename: gpt-oss-120b-mxfp4-00002-of-00003.gguf
sha256: fbdb8cdec70edb82c53bfc69cc0f54a34759a23d317fa0771a63be6571907b38
uri: huggingface://ggml-org/gpt-oss-120b-GGUF/gpt-oss-120b-mxfp4-00002-of-00003.gguf
sha256: 346492f65891fb27cac5c74a8c07626cbfeb4211cd391ec4de37dbbe3109a93b
- filename: gpt-oss-120b-mxfp4-00003-of-00003.gguf
sha256: b326bfd8ac696c4b9a14e9e84d5529b2bb86847aea0e65443cbf075accba8b71
uri: huggingface://ggml-org/gpt-oss-120b-GGUF/gpt-oss-120b-mxfp4-00003-of-00003.gguf
sha256: 66dca81040933f5a49177e82c479c51319cefb83bd22dad9f06dad45e25f1463
- !!merge <<: *gptoss
name: "openai_gpt-oss-20b-neo"
icon: https://huggingface.co/DavidAU/Openai_gpt-oss-20b-NEO-GGUF/resolve/main/matrix1.gif
@@ -312,7 +312,7 @@
- https://huggingface.co/Dream-org/Dream-v0-Instruct-7B
- https://huggingface.co/bartowski/Dream-org_Dream-v0-Instruct-7B-GGUF
description: |
This is the instruct model of Dream 7B, which is an open diffusion large language model with top-tier performance.
This is the instruct model of Dream 7B, which is an open diffusion large language model with top-tier performance.
overrides:
parameters:
model: Dream-org_Dream-v0-Instruct-7B-Q4_K_M.gguf
@@ -14446,6 +14446,21 @@
- filename: entfane_math-genius-7B-Q4_K_M.gguf
sha256: cd3a3c898a2dfb03d17a66db81b743f2d66981e0ceb92e8669a4af61217feed7
uri: huggingface://bartowski/entfane_math-genius-7B-GGUF/entfane_math-genius-7B-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "impish_nemo_12b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://huggingface.co/SicariusSicariiStuff/Impish_Nemo_12B/resolve/main/Images/Impish_Nemo_12B.png
urls:
- https://huggingface.co/SicariusSicariiStuff/Impish_Nemo_12B
- https://huggingface.co/SicariusSicariiStuff/Impish_Nemo_12B_GGUF
description: "August 2025, Impish_Nemo_12B — my best model yet. And unlike a typical Nemo, this one can take in much higher temperatures (works well with 1+). Oh, and regarding following the character card: It somehow gotten even better, to the point of it being straight up uncanny \U0001F643 (I had to check twice that this model was loaded, and not some 70B!)\n\nI feel like this model could easily replace models much larger than itself for adventure or roleplay, for assistant tasks, obviously not, but the creativity here? Off the charts. Characters have never felt so alive and in the moment before — theyll use insinuation, manipulation, and, if needed (or provoked) — force. They feel so very present.\n\nThat look on Neos face when he opened his eyes and said, “I know Kung Fu”? Well, Impish_Nemo_12B had pretty much the same moment — and it now knows more than just Kung Fu, much, much more. It wasnt easy, and its a niche within a niche, but as promised almost half a year ago — it is now done.\n\nImpish_Nemo_12B is smart, sassy, creative, and got a lot of unhingedness too — these are baked-in deep into every interaction. It took the innate Mistral's relative freedom, and turned it up to 11. It very well maybe too much for many, but after testing and interacting with so many models, I find this 'edge' of sorts, rather fun and refreshing.\n\nAnyway, the dataset used is absolutely massive, tons of new types of data and new domains of knowledge (Morrowind fandom, fighting, etc...). The whole dataset is a very well-balanced mix, and resulted in a model with extremely strong common sense for a 12B. Regarding response length — there's almost no response-length bias here, this one is very much dynamic and will easily adjust reply length based on 13 examples of provided dialogue.\n\nOh, and the model comes with 3 new Character Cards, 2 Roleplay and 1 Adventure!\n"
overrides:
parameters:
model: Impish_Nemo_12B-Q6_K.gguf
files:
- filename: Impish_Nemo_12B-Q6_K.gguf
sha256: e0ce3adbed2718e144f477721c2ad68b6e3cccd95fc27dbe8f0135be76c99c72
uri: huggingface://SicariusSicariiStuff/Impish_Nemo_12B_GGUF/Impish_Nemo_12B-Q6_K.gguf
- &mudler
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
name: "LocalAI-llama3-8b-function-call-v0.2"
@@ -19460,14 +19475,14 @@
url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
icon: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev/media/main/teaser.png
description: |
FLUX.1 Kontext [dev] is a 12 billion parameter rectified flow transformer capable of editing images based on text instructions. For more information, please read our blog post and our technical report. You can find information about the [pro] version in here.
Key Features
Change existing images based on an edit instruction.
Have character, style and object reference without any finetuning.
Robust consistency allows users to refine an image through multiple successive edits with minimal visual drift.
Trained using guidance distillation, making FLUX.1 Kontext [dev] more efficient.
Open weights to drive new scientific research, and empower artists to develop innovative workflows.
Generated outputs can be used for personal, scientific, and commercial purposes, as described in the FLUX.1 [dev] Non-Commercial License.
FLUX.1 Kontext [dev] is a 12 billion parameter rectified flow transformer capable of editing images based on text instructions. For more information, please read our blog post and our technical report. You can find information about the [pro] version in here.
Key Features
Change existing images based on an edit instruction.
Have character, style and object reference without any finetuning.
Robust consistency allows users to refine an image through multiple successive edits with minimal visual drift.
Trained using guidance distillation, making FLUX.1 Kontext [dev] more efficient.
Open weights to drive new scientific research, and empower artists to develop innovative workflows.
Generated outputs can be used for personal, scientific, and commercial purposes, as described in the FLUX.1 [dev] Non-Commercial License.
urls:
- https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
- https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF

View File

@@ -98,19 +98,19 @@ func (uri URI) DownloadWithAuthorizationAndCallback(basePath string, authorizati
}
func (u URI) FilenameFromUrl() (string, error) {
f, err := filenameFromUrl(string(u))
if err != nil || f == "" {
f = utils.MD5(string(u))
if strings.HasSuffix(string(u), ".yaml") || strings.HasSuffix(string(u), ".yml") {
f = f + ".yaml"
}
err = nil
if f := filenameFromUrl(string(u)); f != "" {
return f, nil
}
return f, err
f := utils.MD5(string(u))
if strings.HasSuffix(string(u), ".yaml") || strings.HasSuffix(string(u), ".yml") {
f = f + ".yaml"
}
return f, nil
}
func filenameFromUrl(urlstr string) (string, error) {
func filenameFromUrl(urlstr string) string {
// strip anything after @
if strings.Contains(urlstr, "@") {
urlstr = strings.Split(urlstr, "@")[0]
@@ -118,13 +118,13 @@ func filenameFromUrl(urlstr string) (string, error) {
u, err := url.Parse(urlstr)
if err != nil {
return "", fmt.Errorf("error due to parsing url: %w", err)
return ""
}
x, err := url.QueryUnescape(u.EscapedPath())
if err != nil {
return "", fmt.Errorf("error due to escaping: %w", err)
return ""
}
return filepath.Base(x), nil
return filepath.Base(x)
}
func (u URI) LooksLikeURL() bool {
@@ -158,6 +158,10 @@ func (s URI) LooksLikeOCI() bool {
strings.HasPrefix(string(s), "docker.io")
}
func (s URI) LooksLikeOCIFile() bool {
return strings.HasPrefix(string(s), OCIFilePrefix)
}
func (s URI) ResolveURL() string {
switch {
case strings.HasPrefix(string(s), GithubURI2):