debug

2026-02-03 11:13:31 -05:00 · 2024-06-14 08:42:15 +02:00
19 changed files with 52 additions and 320 deletions
--- a/.github/ci/modelslist.go
+++ b/.github/ci/modelslist.go
@@ -114,12 +114,12 @@ var modelPageTemplate string = `

 	<h2 class="text-center text-3xl font-semibold text-gray-100">

-	 🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
+	 🖼️ Available {{.AvailableModels}} models</i> repositories     <a href="https://localai.io/models/" target="_blank" >
 			<i class="fas fa-circle-info pr-2"></i>
 		</a></h2> 

 	<h3>	  
-	Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
+	Refer to <a href="https://localai.io/models" target=_blank> Model gallery</a> for more information on how to use the models with LocalAI.

 	You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
 	</h3>
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -100,14 +100,7 @@ jobs:
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
-          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
-          sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
-          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
-          GO_TAGS=p2p \
-          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
-          GOOS=linux \
-          GOARCH=arm64 \
-          CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
+          GO_TAGS=p2p GOOS=linux GOARCH=arm64 CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-linux-arm64
@@ -118,13 +111,7 @@ jobs:
        with:
          files: |
            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+
  build-linux:
    runs-on: arc-runner-set
    steps:
@@ -203,7 +190,6 @@ jobs:
      - name: Install gRPC
        run: |
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
      - name: Build
        id: build
        run: |
@@ -213,10 +199,7 @@ jobs:
          export PATH=/usr/local/cuda/bin:$PATH
          export PATH=/opt/rocm/bin:$PATH
          source /opt/intel/oneapi/setvars.sh
-          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
-          GO_TAGS=p2p \
-          BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
-          make -j4 dist
+          GO_TAGS=p2p make -j4 dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-linux
@@ -227,13 +210,7 @@ jobs:
        with:
          files: |
            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+
  build-stablediffusion:
    runs-on: ubuntu-latest
    steps:
@@ -272,6 +249,11 @@ jobs:
  build-macOS-arm64:
    runs-on: macos-14
    steps:
+      - name: Setup tmate session if tests fail
+        uses: mxschmitt/action-tmate@v3.18
+        with:
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true
      - name: Clone
        uses: actions/checkout@v4
        with:
@@ -291,8 +273,7 @@ jobs:
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
-          
-          BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
+          GO_TAGS=p2p make dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-MacOS-arm64
@@ -303,10 +284,3 @@ jobs:
        with:
          files: |
            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
--- a/15
+++ b/15
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai

 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=37bef8943312d91183ff06d8f1214082a17344a5
+CPPLLAMA_VERSION?=172c8256840ffd882ab9992ecedbb587d9b21f15

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -313,10 +313,6 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-ifneq ($(BACKEND_LIBS),)
-	$(MAKE) backend-assets/lib
-	cp $(BACKEND_LIBS) backend-assets/lib/
-endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./

 build-minimal:
@@ -325,11 +321,8 @@ build-minimal:
 build-api:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build

-backend-assets/lib:
-	mkdir -p backend-assets/lib
-
 dist:
-	$(MAKE) backend-assets/grpc/llama-cpp-avx2
+	STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
 ifeq ($(OS),Darwin)
 	$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
 else
@@ -338,7 +331,7 @@ else
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
 endif
-	STATIC=true $(MAKE) build
+	$(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
 ifeq ($(BUILD_ID),)
@@ -351,7 +344,7 @@ endif

 dist-cross-linux-arm64: 
 	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
-	STATIC=true $(MAKE) build
+	$(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
 ifeq ($(BUILD_ID),)
--- a/README.md
+++ b/README.md
@@ -65,7 +65,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
 - 🔥🔥 Decentralized llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
 - 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
 - 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -17,7 +17,7 @@ import backend_pb2_grpc

 import grpc

-from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
+from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image,export_to_video
@@ -225,17 +225,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                        torch_dtype=torchType, 
                        use_safetensors=True, 
                        variant=variant)
-            elif request.PipelineType == "StableDiffusion3Pipeline":
-                if fromSingleFile:
-                    self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
-                                                               torch_dtype=torchType,
-                                                               use_safetensors=True)
-                else:
-                    self.pipe = StableDiffusion3Pipeline.from_pretrained(
-                        request.Model, 
-                        torch_dtype=torchType, 
-                        use_safetensors=True, 
-                        variant=variant)

            if CLIPSKIP and request.CLIPSkip != 0:
                self.clip_skip = request.CLIPSkip
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -5,7 +5,6 @@ grpcio==1.64.0
 opencv-python
 pillow
 protobuf
-sentencepiece
 torch
 transformers
-certifi
+certifi
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -57,7 +57,6 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 	if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
 	}
-
 	for _, modelName := range mi.ModelArgs {

 		progressBar := progressbar.NewOptions(
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -43,7 +43,6 @@ type RunCMD struct {
 	Address              string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	CORS                 bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
 	CORSAllowOrigins     string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
-	LibraryPath          string   `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
 	CSRF                 bool     `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
 	UploadLimit          int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys              []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
@@ -81,7 +80,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
 		config.WithCsrf(r.CSRF),
-		config.WithLibPath(r.LibraryPath),
 		config.WithThreads(r.Threads),
 		config.WithBackendAssets(ctx.BackendAssets),
 		config.WithBackendAssetsOutput(r.BackendAssetsPath),
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -15,7 +15,6 @@ type ApplicationConfig struct {
 	Context                             context.Context
 	ConfigFile                          string
 	ModelPath                           string
-	LibPath                             string
 	UploadLimitMB, Threads, ContextSize int
 	DisableWebUI                        bool
 	F16                                 bool
@@ -102,12 +101,6 @@ func WithModelLibraryURL(url string) AppOption {
 	}
 }

-func WithLibPath(path string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.LibPath = path
-	}
-}
-
 var EnableWatchDog = func(o *ApplicationConfig) {
 	o.WatchDog = true
 }
--- a/core/http/views/talk.html
+++ b/core/http/views/talk.html
@@ -50,10 +50,6 @@
      </div>
      <div id="loader" class="my-2 loader" style="display: none;"></div>
      <div id="statustext" class="my-2 p-2 block text-white-700 text-sm font-bold mb-2" ></div>
-      <!-- Note for recording box -->
-      <div class="text-sm mb-4 text-white-500">
-        <strong>Note:</strong> You need an LLM a audio-transcription(whisper) and a tts model installed in order for this to work. Select the appropariate model from the toolbox and then click the 'Talk' button to start recording. The recording will continue until you click 'Stop recording'. Make sure your microphone is set up and enabled.
-      </div>
      <div class="mb-4" >
        <label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
        <select id="modelSelect"
@@ -99,7 +95,7 @@
        class="bg-red-500 hover:bg-red-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
      ><i class="fa-solid fa-microphone pr-2"></i>Talk</button>
      <a id="resetButton"
-      class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-gray-200"
+      class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
      href="#"
      >Reset conversation</a>
      <audio id="audioPlayback" controls hidden></audio>
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -9,7 +9,6 @@ import (
 	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/go-skynet/LocalAI/internal"
 	"github.com/go-skynet/LocalAI/pkg/assets"
-	"github.com/go-skynet/LocalAI/pkg/library"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	pkgStartup "github.com/go-skynet/LocalAI/pkg/startup"
 	"github.com/go-skynet/LocalAI/pkg/xsysinfo"
@@ -110,11 +109,6 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 		}
 	}

-	if options.LibPath != "" {
-		// If there is a lib directory, set LD_LIBRARY_PATH to include it
-		library.LoadExternal(options.LibPath)
-	}
-
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -85,7 +85,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | Description | Quay | Docker Hub                                   |
 | --- | --- |-----------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-cpu`                  | `localai/localai:latest-cpu`                  |
+| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest`                      |
 | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg` | `localai/localai:{{< version >}}-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core` | `localai/localai:{{< version >}}-ffmpeg-core` |
@@ -97,7 +97,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11`                 | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda11` | `localai/localai:latest-cublas-cuda11`                      |
 | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg-core` |
@@ -109,7 +109,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12`                 |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda12` | `localai/localai:latest-cublas-cuda12`                      |
 | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg-core` |
@@ -121,7 +121,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-sycl-f16` | `localai/localai:latest-sycl-f16`                      |
 | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` |
@@ -133,7 +133,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-sycl-f32` | `localai/localai:latest-sycl-f32`                      |
 | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` |
@@ -145,7 +145,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
 | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-hipblas` | `localai/localai:master-hipblas`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-hipblas`                  | `localai/localai:latest-gpu-hipblas`                  |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-hipblas` | `localai/localai:latest-hipblas`                      |
 | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas` | `localai/localai:{{< version >}}-hipblas`             |
 | Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-hipblas-ffmpeg` | `localai/localai:{{< version >}}-hipblas-ffmpeg`      |
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas-ffmpeg-core` | `localai/localai:{{< version >}}-hipblas-ffmpeg-core` |
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.17.0"
+  "version": "v2.16.0"
 }
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -22,53 +22,6 @@
    - filename: Qwen2-7B-Instruct-Q4_K_M.gguf
      sha256: 8d0d33f0d9110a04aad1711b1ca02dafc0fa658cd83028bdfa5eff89c294fe76
      uri: huggingface://bartowski/Qwen2-7B-Instruct-GGUF/Qwen2-7B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "dolphin-2.9.2-qwen2-72b"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png
-  urls:
-    - https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-72b-gguf
-  description: |
-    Dolphin 2.9.2 Qwen2 72B 🐬
-
-    Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
-  overrides:
-    parameters:
-      model: dolphin-2.9.2-qwen2-Q4_K_M.gguf
-  files:
-    - filename: dolphin-2.9.2-qwen2-Q4_K_M.gguf
-      sha256: 44a0e82cbc2a201b2f4b9e16099a0a4d97b6f0099d45bcc5b354601f38dbb709
-      uri: huggingface://cognitivecomputations/dolphin-2.9.2-qwen2-72b-gguf/qwen2-Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "dolphin-2.9.2-qwen2-7b"
-  description: |
-    Dolphin 2.9.2 Qwen2 7B 🐬
-
-    Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
-  urls:
-    - https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-7b
-    - https://huggingface.co/cognitivecomputations/dolphin-2.9.2-qwen2-7b-gguf
-  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png
-  overrides:
-    parameters:
-      model: dolphin-2.9.2-qwen2-7b-Q4_K_M.gguf
-  files:
-    - filename: dolphin-2.9.2-qwen2-7b-Q4_K_M.gguf
-      sha256: a15b5db4df6be4f4bfb3632b2009147332ef4c57875527f246b4718cb0d3af1f
-      uri: huggingface://cognitivecomputations/dolphin-2.9.2-qwen2-7b-gguf/dolphin-2.9.2-qwen2-7b-Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "samantha-qwen-2-7B"
-  description: |
-    Samantha based on qwen2
-  urls:
-    - https://huggingface.co/bartowski/Samantha-Qwen-2-7B-GGUF
-    - https://huggingface.co/macadeliccc/Samantha-Qwen2-7B
-  overrides:
-    parameters:
-      model: Samantha-Qwen-2-7B-Q4_K_M.gguf
-  files:
-    - filename: Samantha-Qwen-2-7B-Q4_K_M.gguf
-      sha256: 5d1cf1c35a7a46c536a96ba0417d08b9f9e09c24a4e25976f72ad55d4904f6fe
-      uri: huggingface://bartowski/Samantha-Qwen-2-7B-GGUF/Samantha-Qwen-2-7B-Q4_K_M.gguf
 ## START Mistral
 - &mistral03
  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -223,37 +176,6 @@
    - filename: gemma-2b.Q4_K_M.gguf
      sha256: 37d50c21ef7847926204ad9b3007127d9a2722188cfd240ce7f9f7f041aa71a5
      uri: huggingface://mlabonne/gemma-2b-GGUF/gemma-2b.Q4_K_M.gguf
- !!merge <<: *gemma
-  name: "firefly-gemma-7b-iq-imatrix"
-  icon: "https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/SrOekTxdpnxHyWWmMiAvc.jpeg"
-  urls:
-    - https://huggingface.co/Lewdiculous/firefly-gemma-7b-GGUF-IQ-Imatrix
-    - https://huggingface.co/YeungNLP/firefly-gemma-7b
-  description: |
-    firefly-gemma-7b is trained based on gemma-7b to act as a helpful and harmless AI assistant. We use Firefly to train the model on a single V100 GPU with QLoRA.
-  overrides:
-    parameters:
-      model: firefly-gemma-7b-Q4_K_S-imatrix.gguf
-  files:
-    - filename: firefly-gemma-7b-Q4_K_S-imatrix.gguf
-      sha256: 622e0b8e4f12203cc40c7f87915abf99498c2e0582203415ca236ea37643e428
-      uri: huggingface://Lewdiculous/firefly-gemma-7b-GGUF-IQ-Imatrix/firefly-gemma-7b-Q4_K_S-imatrix.gguf
- !!merge <<: *gemma
-  name: "gemma-1.1-7b-it"
-  urls:
-    - https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF
-    - https://huggingface.co/google/gemma-1.1-7b-it
-  description: |
-      This is Gemma 1.1 7B (IT), an update over the original instruction-tuned Gemma release.
-
-      Gemma 1.1 was trained using a novel RLHF method, leading to substantial gains on quality, coding capabilities, factuality, instruction following and multi-turn conversation quality. We also fixed a bug in multi-turn conversations, and made sure that model responses don't always start with "Sure,".
-  overrides:
-    parameters:
-      model: gemma-1.1-7b-it-Q4_K_M.gguf
-  files:
-    - filename: gemma-1.1-7b-it-Q4_K_M.gguf
-      sha256: 47821da72ee9e80b6fd43c6190ad751b485fb61fa5664590f7a73246bcd8332e
-      uri: huggingface://bartowski/gemma-1.1-7b-it-GGUF/gemma-1.1-7b-it-Q4_K_M.gguf
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1133,21 +1055,6 @@
    - filename: Tess-2.0-Llama-3-8B-Q4_K_M.gguf
      sha256: 3b5fbd6c59d7d38205ab81970c0227c74693eb480acf20d8c2f211f62e3ca5f6
      uri: huggingface://bartowski/Tess-2.0-Llama-3-8B-GGUF/Tess-2.0-Llama-3-8B-Q4_K_M.gguf
- !!merge <<: *llama3
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
-  name: "tess-v2.5-phi-3-medium-128k-14b"
-  urls:
-    - https://huggingface.co/bartowski/Tess-v2.5-Phi-3-medium-128k-14B-GGUF
-  icon: https://huggingface.co/migtissera/Tess-2.0-Mixtral-8x22B/resolve/main/Tess-2.png
-  description: |
-    Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series.
-  overrides:
-    parameters:
-      model: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
-  files:
-    - filename: Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
-      sha256: 9efb6ebc00de74012d0fb36134cce07d624a870fc12f38b16b57ce447b86e27e
-      uri: huggingface://bartowski/Tess-v2.5-Phi-3-medium-128k-14B-GGUF/Tess-v2.5-Phi-3-medium-128k-14B-Q4_K_M.gguf
 - !!merge <<: *llama3
  name: "llama3-iterative-dpo-final"
  urls:
@@ -1795,20 +1702,6 @@
    - filename: Llama-3-Update-3.0-mmproj-model-f16.gguf
      sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a
      uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf
- !!merge <<: *llama3
-  name: "hathor_stable-v0.2-l3-8b"
-  urls:
-    - https://huggingface.co/bartowski/Hathor_Stable-v0.2-L3-8B-GGUF
-  description: |
-    Hathor-v0.2 is a model based on the LLaMA 3 architecture: Designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. Making it an ideal tool for a wide range of applications; such as creative writing, educational support and human/computer interaction.
-  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/FLvA7-CWp3UhBuR2eGSh7.webp
-  overrides:
-    parameters:
-      model: Hathor_Stable-v0.2-L3-8B-Q4_K_M.gguf
-  files:
-    - filename: Hathor_Stable-v0.2-L3-8B-Q4_K_M.gguf
-      sha256: 291cd30421f519ec00e04ae946a4f639d8d1b7c294cb2b2897b35da6d498fdc4
-      uri: huggingface://bartowski/Hathor_Stable-v0.2-L3-8B-GGUF/Hathor_Stable-v0.2-L3-8B-Q4_K_M.gguf
 - !!merge <<: *llama3
  name: "bunny-llama-3-8b-v"
  urls:
@@ -2580,21 +2473,6 @@
    - filename: DreamShaper_8_pruned.safetensors
      uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
      sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
- name: stable-diffusion-3-medium
-  icon: https://huggingface.co/leo009/stable-diffusion-3-medium/resolve/main/sd3demo.jpg
-  license: other
-  description: |
-    Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
-  urls:
-    - https://huggingface.co/stabilityai/stable-diffusion-3-medium
-    - https://huggingface.co/leo009/stable-diffusion-3-medium
-  tags:
-    - text-to-image
-    - stablediffusion
-    - python
-    - sd-3
-    - gpu
-  url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
 - &whisper
  ## Whisper
  url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
--- a/gallery/stablediffusion3.yaml
+++ b/gallery/stablediffusion3.yaml
@@ -1,14 +0,0 @@
---
-name: "stable-diffusion-3-medium"
-
-config_file: |
-  backend: diffusers
-  diffusers:
-    cuda: true
-    enable_parameters: negative_prompt,num_inference_steps
-    pipeline_type: StableDiffusion3Pipeline
-  f16: false
-  name: sd3
-  parameters:
-    model: v2ray/stable-diffusion-3-medium-diffusers
-  step: 25
--- a/pkg/assets/extract.go
+++ b/pkg/assets/extract.go
@@ -6,8 +6,7 @@ import (
 	"io/fs"
 	"os"
 	"path/filepath"
-
-	"github.com/go-skynet/LocalAI/pkg/library"
+	"runtime"
 )

 func ResolvePath(dir string, paths ...string) string {
@@ -56,7 +55,27 @@ func ExtractFiles(content embed.FS, extractDir string) error {
 	// If there is a lib directory, set LD_LIBRARY_PATH to include it
 	// we might use this mechanism to carry over e.g. Nvidia CUDA libraries
 	// from the embedded FS to the target directory
-	library.LoadExtractedLibs(extractDir)

+	// Skip this if LOCALAI_SKIP_LIBRARY_PATH is set
+	if os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != "" {
+		return err
+	}
+
+	lpathVar := "LD_LIBRARY_PATH"
+	if runtime.GOOS == "darwin" {
+		lpathVar = "DYLD_FALLBACK_LIBRARY_PATH" // should it be DYLD_LIBRARY_PATH ?
+	}
+
+	for _, libDir := range []string{filepath.Join(extractDir, "backend_assets", "lib"), filepath.Join(extractDir, "lib")} {
+		if _, err := os.Stat(libDir); err == nil {
+			ldLibraryPath := os.Getenv(lpathVar)
+			if ldLibraryPath == "" {
+				ldLibraryPath = libDir
+			} else {
+				ldLibraryPath = fmt.Sprintf("%s:%s", ldLibraryPath, libDir)
+			}
+			os.Setenv(lpathVar, ldLibraryPath)
+		}
+	}
 	return err
 }
--- a/pkg/library/dynaload.go
+++ b/pkg/library/dynaload.go
@@ -1,79 +0,0 @@
-package library
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"runtime"
-
-	"github.com/rs/zerolog/log"
-)
-
-/*
-	This file contains functions to load libraries from the asset directory to keep the business logic clean.
-*/
-
-// skipLibraryPath checks if LOCALAI_SKIP_LIBRARY_PATH is set
-var skipLibraryPath = os.Getenv("LOCALAI_SKIP_LIBRARY_PATH") != ""
-
-// LoadExtractedLibs loads the extracted libraries from the asset dir
-func LoadExtractedLibs(dir string) {
-	if skipLibraryPath {
-		return
-	}
-
-	for _, libDir := range []string{filepath.Join(dir, "backend-assets", "lib"), filepath.Join(dir, "lib")} {
-		LoadExternal(libDir)
-	}
-}
-
-// LoadLDSO checks if there is a ld.so in the asset dir and if so, prefixes the grpc process with it.
-// In linux, if we find a ld.so in the asset dir we prefix it to run with the libs exposed in
-// LD_LIBRARY_PATH for more compatibility
-// If we don't do this, we might run into stack smash
-// See also: https://stackoverflow.com/questions/847179/multiple-glibc-libraries-on-a-single-host/851229#851229
-// In this case, we expect a ld.so in the lib asset dir.
-// If that's present, we use it to run the grpc backends as supposedly built against
-// that specific version of ld.so
-func LoadLDSO(assetDir string, args []string, grpcProcess string) ([]string, string) {
-	if skipLibraryPath {
-		return args, grpcProcess
-	}
-
-	if runtime.GOOS != "linux" {
-		return args, grpcProcess
-	}
-
-	// Check if there is a ld.so file in the assetDir, if it does, we need to run the grpc process with it
-	ldPath := filepath.Join(assetDir, "backend-assets", "lib", "ld.so")
-	if _, err := os.Stat(ldPath); err == nil {
-		log.Debug().Msgf("ld.so found")
-		// We need to run the grpc process with the ld.so
-		args = append(args, grpcProcess)
-		grpcProcess = ldPath
-	}
-
-	return args, grpcProcess
-}
-
-// LoadExternal sets the LD_LIBRARY_PATH to include the given directory
-func LoadExternal(dir string) {
-	if skipLibraryPath {
-		return
-	}
-
-	lpathVar := "LD_LIBRARY_PATH"
-	if runtime.GOOS == "darwin" {
-		lpathVar = "DYLD_FALLBACK_LIBRARY_PATH" // should it be DYLD_LIBRARY_PATH ?
-	}
-
-	if _, err := os.Stat(dir); err == nil {
-		ldLibraryPath := os.Getenv(lpathVar)
-		if ldLibraryPath == "" {
-			ldLibraryPath = dir
-		} else {
-			ldLibraryPath = fmt.Sprintf("%s:%s", ldLibraryPath, dir)
-		}
-		os.Setenv(lpathVar, ldLibraryPath)
-	}
-}
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -11,7 +11,6 @@ import (
 	"time"

 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/library"
 	"github.com/go-skynet/LocalAI/pkg/xsysinfo"
 	"github.com/klauspost/cpuid/v2"
 	"github.com/phayes/freeport"
@@ -327,13 +326,8 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 				return "", fmt.Errorf("failed allocating free ports: %s", err.Error())
 			}

-			args := []string{}
-
-			// Load the ld.so if it exists
-			args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
-
-			// Make sure the process is executable in any circumstance
-			if err := ml.startProcess(grpcProcess, o.model, serverAddress, args...); err != nil {
+			// Make sure the process is executable
+			if err := ml.startProcess(grpcProcess, o.model, serverAddress); err != nil {
 				return "", err
 			}

--- a/pkg/model/process.go
+++ b/pkg/model/process.go
@@ -69,7 +69,7 @@ func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
 	return strconv.Atoi(p.PID)
 }

-func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) error {
+func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string) error {
 	// Make sure the process is executable
 	if err := os.Chmod(grpcProcess, 0700); err != nil {
 		return err
@@ -82,7 +82,7 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
 	grpcControlProcess := process.New(
 		process.WithTemporaryStateDir(),
 		process.WithName(grpcProcess),
-		process.WithArgs(append(args, []string{"--addr", serverAddress}...)...),
+		process.WithArgs("--addr", serverAddress),
 		process.WithEnvironment(os.Environ()...),
 	)