experiment: build with a single image with all the deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
models(gallery): ⬆️ update checksum (#2690 )
2026-02-03 03:02:38 -05:00 · 2024-07-01 19:43:18 +02:00 · 2024-07-01 00:23:58 +00:00 · 2024-07-01 00:20:11 +00:00 · 2024-06-30 14:40:01 +02:00 · 2024-06-30 01:51:51 +00:00
184 changed files with 2663 additions and 1293 deletions
--- a/.github/ci/modelslist.go
+++ b/.github/ci/modelslist.go
@@ -75,7 +75,7 @@ var modelPageTemplate string = `
    <div class="container mx-auto px-4 py-4">
        <div class="flex items-center justify-between">
            <div class="flex items-center">
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
            </div>
            <!-- Menu button for small screens -->
@@ -92,9 +92,9 @@ var modelPageTemplate string = `
        <!-- Collapsible menu for small screens -->
        <div class="hidden lg:hidden" id="mobile-menu">
            <div class="pt-4 pb-3 border-t border-gray-700">
-                
+
                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-               
+
            </div>
        </div>
    </div>
@@ -114,17 +114,17 @@ var modelPageTemplate string = `

 	<h2 class="text-center text-3xl font-semibold text-gray-100">

-	 🖼️ Available {{.AvailableModels}} models</i> repositories     <a href="https://localai.io/models/" target="_blank" >
+	 🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
 			<i class="fas fa-circle-info pr-2"></i>
-		</a></h2> 
+		</a></h2>

-	<h3>	  
-	Refer to <a href="https://localai.io/models" target=_blank> Model gallery</a> for more information on how to use the models with LocalAI.
+	<h3>
+	Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>

 	You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
 	</h3>
-  
-	<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" 
+
+	<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
 	id="searchbox" placeholder="Live search keyword..">
 	  <div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
 		{{ range $_, $model := .Models }}
@@ -139,10 +139,10 @@ var modelPageTemplate string = `
 			</div>
 	  		<div class="p-6 text-surface dark:text-white">
 				<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
-				
-				   
+
+
 				<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
-		
+
 			</div>
 			<div class="px-6 pt-4 pb-2">

@@ -178,7 +178,7 @@ var modelPageTemplate string = `
                    {{ $model.Description }}

                    </p>
-                    
+
                    <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
                    To install the model with the CLI, run: <br>
                    <code> local-ai models install {{$model.Name}} </code> <br>
@@ -193,7 +193,7 @@ var modelPageTemplate string = `
                    <ul>
                    {{ range $_, $u := $model.URLs }}
                    <li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
-                    {{ end }}  
+                    {{ end }}
                    </ul>
                    </p>
                </div>
@@ -209,7 +209,7 @@ var modelPageTemplate string = `
 			</div>
 		</div>
 		</div>
-		{{ end }}      
+		{{ end }}

 		</div>
  </div>
@@ -221,10 +221,10 @@ var lazyLoadInstance = new LazyLoad({
 });

 let cards = document.querySelectorAll('.box')
-    
+
 function liveSearch() {
    let search_query = document.getElementById("searchbox").value;
-    
+
    //Use innerText if all contents are visible
    //Use textContent for including hidden elements
    for (var i = 0; i < cards.length; i++) {
@@ -238,8 +238,8 @@ function liveSearch() {
 }

 //A little delay
-let typingTimer;               
-let typeInterval = 500;  
+let typingTimer;
+let typeInterval = 500;
 let searchInput = document.getElementById('searchbox');

 searchInput.addEventListener('keyup', () => {
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -32,7 +32,7 @@ jobs:
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
+      max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
      matrix:
        include:
          - build-type: ''
@@ -46,7 +46,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -119,7 +119,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
@@ -127,4 +127,13 @@ jobs:
            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
+          - build-type: 'vulkan'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-vulkan-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -39,7 +39,7 @@ jobs:
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
+      max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
      matrix:
        include:
          # Extra images
@@ -64,7 +64,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11'
@@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@@ -86,7 +86,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda11-ffmpeg'
@@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -232,7 +232,7 @@ jobs:
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
-  
+
  core-image-build:
    uses: ./.github/workflows/image_build.yml
    with:
@@ -257,6 +257,7 @@ jobs:
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
+      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
      matrix:
        include:
          - build-type: ''
@@ -266,52 +267,62 @@ jobs:
            ffmpeg: 'true'
            image-type: 'core'
            base-image: "ubuntu:22.04"
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            aio: "-aio-cpu"
            latest-image: 'latest-cpu'
            latest-image-aio: 'latest-aio-cpu'
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-core'
            ffmpeg: ''
            image-type: 'core'
            base-image: "ubuntu:22.04"
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
            ffmpeg: ''
            image-type: 'core'
            base-image: "ubuntu:22.04"
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=4 --output-sync=target"
+          - build-type: 'vulkan'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-vulkan-ffmpeg-core'
+            latest-image: 'latest-vulkan-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -19,11 +19,11 @@ on:
        type: string
      cuda-major-version:
        description: 'CUDA major version'
-        default: "11"
+        default: "12"
        type: string
      cuda-minor-version:
        description: 'CUDA minor version'
-        default: "7"
+        default: "5"
        type: string
      platforms:
        description: 'Platforms'
@@ -324,7 +324,7 @@ jobs:
          docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
          docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-  
+
      - name: job summary
        run: |
          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -40,7 +40,7 @@ jobs:
          sudo apt-get update
          sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
        env:
-          CUDA_VERSION: 12-4
+          CUDA_VERSION: 12-5
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@@ -102,8 +102,9 @@ jobs:
          export PATH=/usr/local/cuda/bin:$PATH
          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
          sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
+          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
          GO_TAGS=p2p \
-          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0" \
+          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
          GOOS=linux \
          GOARCH=arm64 \
          CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
@@ -166,15 +167,15 @@ jobs:
          ROCM_VERSION: "6.1"
          AMDGPU_VERSION: "6.1"
        run: |
-            set -ex 
+            set -ex

            sudo apt-get update
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg 
-            
-            curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - 
-              
+            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
+
+            curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
+
            printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
-            
+
            printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
            printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
            sudo apt-get update
@@ -182,10 +183,10 @@ jobs:
            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
                hipblas-dev rocm-dev \
                rocblas-dev
-          
+
            sudo apt-get clean
            sudo rm -rf /var/lib/apt/lists/*
-            sudo ldconfig 
+            sudo ldconfig
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@@ -212,8 +213,9 @@ jobs:
          export PATH=/usr/local/cuda/bin:$PATH
          export PATH=/opt/rocm/bin:$PATH
          source /opt/intel/oneapi/setvars.sh
+          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
          GO_TAGS=p2p \
-          BACKEND_LIBS="/usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
+          BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
          make -j4 dist
      - uses: actions/upload-artifact@v4
        with:
@@ -289,7 +291,7 @@ jobs:
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
-          
+
          BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
      - uses: actions/upload-artifact@v4
        with:
@@ -307,4 +309,4 @@ jobs:
        with:
          detached: true
          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          limit-access-to-actor: true
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -220,7 +220,7 @@ jobs:
          export CPLUS_INCLUDE_PATH=/usr/local/include
          # Used to run the newer GNUMake version from brew that supports --output-sync
          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
-          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
+          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.18
--- a/125
+++ b/125
@@ -5,6 +5,12 @@ ARG INTEL_BASE_IMAGE=${BASE_IMAGE}

 # The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM ${BASE_IMAGE} AS requirements-core
+# TODO(mudler): install all accellerators here
+# and use make dist instead of build.
+# TODO(mudler): modify make dist to build also go-piper and stablediffusion
+# This way the same binary can work for everything(!)
+# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
+# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies

 USER root

@@ -33,7 +39,7 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta
 ENV PATH $PATH:/root/go/bin:/usr/local/go/bin

 # Install grpc compilers
-RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.1 && \
+RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af

 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
@@ -49,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
 # HipBLAS requirements
 ENV PATH /opt/rocm/bin:${PATH}

-# OpenBLAS requirements and stable diffusion
+# OpenBLAS requirements and stable diffusion, tts (espeak)
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        libopenblas-dev \
+        espeak-ng \
+        espeak \
        libopencv-dev && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
@@ -77,8 +85,6 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        espeak-ng \
-        espeak \
        python3-pip \
        python-is-python3 \
        python3-dev \
@@ -93,16 +99,31 @@ RUN pip install --user grpcio-tools
 ###################################
 ###################################

-# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
-# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
-FROM requirements-${IMAGE_TYPE} AS requirements-drivers
+# Base image for the build-type. 
+FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers

 ARG BUILD_TYPE
-ARG CUDA_MAJOR_VERSION=11
-ARG CUDA_MINOR_VERSION=8
+ARG CUDA_MAJOR_VERSION=12
+ARG CUDA_MINOR_VERSION=5

 ENV BUILD_TYPE=${BUILD_TYPE}

+# Vulkan requirements
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "vulkan" ]; then
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+                        software-properties-common pciutils wget gpg-agent && \
+        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+        apt-get update && \
+            apt-get install -y \
+            vulkan-sdk && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
 # CuBLAS requirements
 RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "cublas" ]; then
@@ -170,6 +191,82 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
        ldconfig \
    ; fi

+# The build-requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
+# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
+FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
+
+ARG BUILD_TYPE
+ARG CUDA_MAJOR_VERSION=12
+ARG CUDA_MINOR_VERSION=5
+
+ENV BUILD_TYPE=${BUILD_TYPE}
+
+# Vulkan requirements
+RUN <<EOT bash
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+                        software-properties-common pciutils wget gpg-agent && \
+        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+        apt-get update && \
+            apt-get install -y \
+            vulkan-sdk && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+EOT
+
+# CuBLAS requirements
+RUN <<EOT bash
+    apt-get update && \
+    apt-get install -y  --no-install-recommends \
+                    software-properties-common pciutils
+    if [ "amd64" = "$TARGETARCH" ]; then
+        curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+        fi
+    if [ "arm64" = "$TARGETARCH" ]; then
+        curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+    fi
+    dpkg -i cuda-keyring_1.1-1_all.deb && \
+        rm -f cuda-keyring_1.1-1_all.deb && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+        apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+EOT
+
+# clblas
+RUN apt-get update && \
+        apt-get install -y --no-install-recommends \
+            libclblast-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+
+# intel
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
+rm -rf /var/lib/apt/lists/*
+
+# hipblas
+RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+        gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
+        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
+        | tee /etc/apt/sources.list.d/amdgpu.list && \
+        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" |  tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
+        apt update && \
+        apt-get install -y --no-install-recommends \
+            hipblas-dev rocm-dev \
+            rocblas-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* && \
+        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
+        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
+        ldconfig
+
 ###################################
 ###################################

@@ -221,7 +318,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall

 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
-FROM requirements-drivers AS builder
+FROM build-requirements-drivers AS builder

 ARG GO_TAGS="stablediffusion tts p2p"
 ARG GRPC_BACKENDS
@@ -266,7 +363,8 @@ COPY --from=grpc /opt/grpc /usr/local

 # Rebuild with defaults backends
 WORKDIR /build
-RUN make build
+# Need to build tts and stablediffusion separately first (?)
+RUN make dist && rm release/*.sha256 && mv release/* local-ai

 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
        mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -278,7 +376,7 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \

 # This is the final target. The result of this target will be the image uploaded to the registry.
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
-FROM requirements-drivers
+FROM run-requirements-drivers

 ARG FFMPEG
 ARG BUILD_TYPE
@@ -292,7 +390,7 @@ ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
 ENV MAKEFLAGS=${MAKEFLAGS}

-ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MAJOR_VERSION=12
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
@@ -323,6 +421,7 @@ RUN make prepare-sources
 COPY --from=builder /build/local-ai ./

 # Copy shared libraries for piper
+# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/

 # do not let stablediffusion rebuild (requires an older version of absl)
--- a/47
+++ b/47
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai

 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=21be9cab94e0b5b53cb6edeeebf8c8c799baad03
+CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -54,7 +54,7 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell gi

 OPTIONAL_TARGETS?=

-OS := $(shell uname -s)
+export OS := $(shell uname -s)
 ARCH := $(shell uname -m)
 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
@@ -80,8 +80,8 @@ ifeq ($(OS),Darwin)
 		BUILD_TYPE=metal
 	# disable metal if on Darwin and any other value is explicitly passed.
 	else ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DLLAMA_METAL=OFF
-		export LLAMA_NO_ACCELERATE=1
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+		export GGML_NO_ACCELERATE=1
 	endif

 	ifeq ($(BUILD_TYPE),metal)
@@ -98,11 +98,15 @@ endif

 ifeq ($(BUILD_TYPE),cublas)
 	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
-	export LLAMA_CUBLAS=1
+	export GGML_CUDA=1
 	export WHISPER_CUDA=1
 	CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
 endif

+ifeq ($(BUILD_TYPE),vulkan)
+	CMAKE_ARGS+=-DGGML_VULKAN=1
+endif
+
 ifeq ($(BUILD_TYPE),hipblas)
 	ROCM_HOME ?= /opt/rocm
 	ROCM_PATH ?= /opt/rocm
@@ -114,13 +118,13 @@ ifeq ($(BUILD_TYPE),hipblas)
 	export WHISPER_HIPBLAS=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
 	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
 endif

 ifeq ($(BUILD_TYPE),metal)
 	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
-	export LLAMA_METAL=1
+	export GGML_METAL=1
 	export WHISPER_METAL=1
 endif

@@ -315,7 +319,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
-	cp -r $(BACKEND_LIBS) backend-assets/lib/
+	cp $(BACKEND_LIBS) backend-assets/lib/
 endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./

@@ -333,10 +337,12 @@ dist:
 ifeq ($(OS),Darwin)
 	$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
 else
+ifneq ($(ARCH),arm64)
 	$(MAKE) backend-assets/grpc/llama-cpp-cuda
 	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
+endif
 endif
 	STATIC=true $(MAKE) build
 	mkdir -p release
@@ -350,7 +356,7 @@ else
 endif

 dist-cross-linux-arm64: 
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
@@ -400,7 +406,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -707,21 +713,21 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-avx2
 	$(MAKE) -C backend/cpp/llama-avx2 purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2

 backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-avx
 	$(MAKE) -C backend/cpp/llama-avx purge
 	$(info ${GREEN}I llama-cpp build info:avx${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx

 backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-fallback
 	$(MAKE) -C backend/cpp/llama-fallback purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
 # TODO: every binary should have its own folder instead, so can have different metal implementations
 ifeq ($(BUILD_TYPE),metal)
@@ -732,7 +738,7 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-cuda
 	$(MAKE) -C backend/cpp/llama-cuda purge
 	$(info ${GREEN}I llama-cpp build info:cuda${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda

 backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
@@ -760,7 +766,7 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-grpc
 	$(MAKE) -C backend/cpp/llama-grpc purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc

 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
@@ -810,6 +816,17 @@ docker:
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .

+docker-cuda11:
+	docker build \
+		--build-arg CUDA_MAJOR_VERSION=11 \
+		--build-arg CUDA_MINOR_VERSION=8 \
+		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
+		--build-arg GO_TAGS="$(GO_TAGS)" \
+		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
+		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
+		-t $(DOCKER_IMAGE)-cuda11 .
+
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
 	docker build \
--- a/README.md
+++ b/README.md
@@ -48,6 +48,13 @@

 ![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e)

+Run the installer script:
+
+```bash
+curl https://localai.io/install.sh | sh
+```
+
+Or run with docker:
 ```bash
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 # Alternative images:
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -230,6 +230,7 @@ message TranscriptRequest {
  string dst = 2;
  string language = 3;
  uint32 threads = 4;
+  bool translate = 5;
 }

 message TranscriptResult {
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -4,34 +4,44 @@ LLAMA_VERSION?=
 CMAKE_ARGS?=
 BUILD_TYPE?=
 ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
+TARGET?=--target grpc-server

-# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
+# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
+CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
+
+# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
 ifeq ($(BUILD_TYPE),cublas)
-	CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
-# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+	CMAKE_ARGS+=-DGGML_CUDA=ON
+# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
 # to CMAKE_ARGS automatically
 else ifeq ($(BUILD_TYPE),openblas)
-	CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
-# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
+	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 else ifeq ($(BUILD_TYPE),clblas)
-	CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
+	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
-# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON
+# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
 # But if it's OSX without metal, disable it here
-else ifeq ($(OS),darwin)
+else ifeq ($(OS),Darwin)
 	ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DLLAMA_METAL=OFF
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+	else
+		CMAKE_ARGS+=-DGGML_METAL=ON
+# Until this is tested properly, we disable embedded metal file
+# as we already embed it as part of the LocalAI assets
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
+		TARGET+=--target ggml-metal
 	endif
 endif

 ifeq ($(BUILD_TYPE),sycl_f16)
-	CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
+	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
 endif

 ifeq ($(BUILD_TYPE),sycl_f32)
-	CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
 endif

 llama.cpp:
@@ -62,8 +72,8 @@ grpc-server: llama.cpp llama.cpp/examples/grpc-server
 	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 	bash -c "source $(ONEAPI_VARS); \
-	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
 else
-	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
 endif
 	cp llama.cpp/build/bin/grpc-server .
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -886,6 +886,8 @@ struct llama_server_context
            {"task_id", slot->task_id},
        });

+        LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
+
        return true;
    }

--- a/backend/go/image/stablediffusion/main.go
+++ b/backend/go/image/stablediffusion/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/image/stablediffusion/stablediffusion.go
+++ b/backend/go/image/stablediffusion/stablediffusion.go
@@ -3,9 +3,9 @@ package main
 // This is a wrapper to statisfy the GRPC service interface
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/stablediffusion"
 )

 type Image struct {
--- a/backend/go/image/tinydream/main.go
+++ b/backend/go/image/tinydream/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/image/tinydream/tinydream.go
+++ b/backend/go/image/tinydream/tinydream.go
@@ -3,9 +3,9 @@ package main
 // This is a wrapper to statisfy the GRPC service interface
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/tinydream"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/tinydream"
 )

 type Image struct {
--- a/backend/go/llm/bert/bert.go
+++ b/backend/go/llm/bert/bert.go
@@ -5,8 +5,8 @@ package main
 import (
 	bert "github.com/go-skynet/go-bert.cpp"

-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )

 type Embeddings struct {
--- a/backend/go/llm/bert/main.go
+++ b/backend/go/llm/bert/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/llm/gpt4all/gpt4all.go
+++ b/backend/go/llm/gpt4all/gpt4all.go
@@ -5,8 +5,8 @@ package main
 import (
 	"fmt"

-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
 )

--- a/backend/go/llm/gpt4all/main.go
+++ b/backend/go/llm/gpt4all/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/llm/langchain/langchain.go
+++ b/backend/go/llm/langchain/langchain.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"os"

-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/langchain"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/langchain"
 )

 type LLM struct {
--- a/backend/go/llm/langchain/main.go
+++ b/backend/go/llm/langchain/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/llm/llama-ggml/llama.go
+++ b/backend/go/llm/llama-ggml/llama.go
@@ -5,9 +5,9 @@ package main
 import (
 	"fmt"

-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/go-llama.cpp"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )

 type LLM struct {
--- a/backend/go/llm/llama-ggml/main.go
+++ b/backend/go/llm/llama-ggml/main.go
@@ -3,7 +3,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/llm/llama/llama.go
+++ b/backend/go/llm/llama/llama.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"path/filepath"

-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/go-llama.cpp"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
 )

 type LLM struct {
--- a/backend/go/llm/llama/main.go
+++ b/backend/go/llm/llama/main.go
@@ -7,7 +7,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/llm/rwkv/main.go
+++ b/backend/go/llm/rwkv/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/llm/rwkv/rwkv.go
+++ b/backend/go/llm/rwkv/rwkv.go
@@ -7,8 +7,8 @@ import (
 	"path/filepath"

 	"github.com/donomii/go-rwkv.cpp"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )

 const tokenizerSuffix = ".tokenizer.json"
@@ -31,7 +31,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
 	model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))

 	if model == nil {
-		return fmt.Errorf("could not load model")
+		return fmt.Errorf("rwkv could not load model")
 	}
 	llm.rwkv = model
 	return nil
--- a/backend/go/stores/main.go
+++ b/backend/go/stores/main.go
@@ -6,7 +6,7 @@ import (
 	"flag"
 	"os"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
--- a/backend/go/stores/store.go
+++ b/backend/go/stores/store.go
@@ -8,8 +8,8 @@ import (
 	"math"
 	"slices"

-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"

 	"github.com/rs/zerolog/log"
 )
--- a/backend/go/transcribe/main.go
+++ b/backend/go/transcribe/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/transcribe/transcript.go
+++ b/backend/go/transcribe/transcript.go
@@ -8,7 +8,7 @@ import (

 	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
 	"github.com/go-audio/wav"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
 )

 func ffmpegCommand(args []string) (string, error) {
@@ -29,7 +29,7 @@ func audioToWav(src, dst string) error {
 	return nil
 }

-func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
+func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
 	res := schema.TranscriptionResult{}

 	dir, err := os.MkdirTemp("", "whisper")
@@ -75,6 +75,10 @@ func Transcript(model whisper.Model, audiopath, language string, threads uint) (
 		context.SetLanguage("auto")
 	}

+	if translate {
+		context.SetTranslate(true)
+	}
+
 	if err := context.Process(data, nil, nil); err != nil {
 		return res, err
 	}
--- a/backend/go/transcribe/whisper.go
+++ b/backend/go/transcribe/whisper.go
@@ -4,9 +4,9 @@ package main
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )

 type Whisper struct {
@@ -22,5 +22,5 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
 }

 func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
-	return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
+	return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
 }
--- a/backend/go/tts/main.go
+++ b/backend/go/tts/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"

-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )

 var (
--- a/backend/go/tts/piper.go
+++ b/backend/go/tts/piper.go
@@ -7,8 +7,8 @@ import (
 	"os"
 	"path/filepath"

-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	piper "github.com/mudler/go-piper"
 )

--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -17,7 +17,7 @@ import backend_pb2_grpc

 import grpc

-from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
+from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image,export_to_video
@@ -225,6 +225,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                        torch_dtype=torchType, 
                        use_safetensors=True, 
                        variant=variant)
+            elif request.PipelineType == "StableDiffusion3Pipeline":
+                if fromSingleFile:
+                    self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
+                                                               torch_dtype=torchType,
+                                                               use_safetensors=True)
+                else:
+                    self.pipe = StableDiffusion3Pipeline.from_pretrained(
+                        request.Model, 
+                        torch_dtype=torchType, 
+                        use_safetensors=True, 
+                        variant=variant)

            if CLIPSKIP and request.CLIPSkip != 0:
                self.clip_skip = request.CLIPSkip
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -5,6 +5,7 @@ grpcio==1.64.0
 opencv-python
 pillow
 protobuf
+sentencepiece
 torch
 transformers
-certifi
+certifi
--- a/core/application.go
+++ b/core/application.go
@@ -1,9 +1,9 @@
 package core

 import (
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 // The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@@ -3,10 +3,10 @@ package backend
 import (
 	"fmt"

-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/pkg/grpc"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/grpc"
+	model "github.com/mudler/LocalAI/pkg/model"
 )

 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -1,10 +1,10 @@
 package backend

 import (
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
 )

 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -9,14 +9,14 @@ import (
 	"sync"
 	"unicode/utf8"

-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"

-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/pkg/grpc"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
 )

 type LLMResponse struct {
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -5,9 +5,9 @@ import (
 	"os"
 	"path/filepath"

-	"github.com/go-skynet/LocalAI/core/config"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/config"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )

@@ -142,12 +142,14 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
 		MirostatTAU:         float32(*c.LLMConfig.MirostatTAU),
 		Debug:               *c.Debug,
 		StopPrompts:         c.StopWords,
-		Repeat:              int32(c.RepeatPenalty),
+		Repeat:              int32(c.RepeatLastN),
+		FrequencyPenalty:    float32(c.FrequencyPenalty),
+		PresencePenalty:     float32(c.PresencePenalty),
+		Penalty:             float32(c.RepeatPenalty),
 		NKeep:               int32(c.Keep),
 		Batch:               int32(c.Batch),
 		IgnoreEOS:           c.IgnoreEOS,
 		Seed:                getSeed(c),
-		FrequencyPenalty:    float32(c.FrequencyPenalty),
 		MLock:               *c.MMlock,
 		MMap:                *c.MMap,
 		MainGPU:             c.MainGPU,
--- a/core/backend/rerank.go
+++ b/core/backend/rerank.go
@@ -4,9 +4,9 @@ import (
 	"context"
 	"fmt"

-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
 )

 func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -1,10 +1,10 @@
 package backend

 import (
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/grpc"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -4,14 +4,14 @@ import (
 	"context"
 	"fmt"

-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"

-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
 )

-func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
+func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {

 	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithBackendString(model.WhisperBackend),
@@ -31,8 +31,9 @@ func ModelTranscription(audio, language string, ml *model.ModelLoader, backendCo
 	}

 	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
-		Dst:      audio,
-		Language: language,
-		Threads:  uint32(*backendConfig.Threads),
+		Dst:       audio,
+		Language:  language,
+		Translate: translate,
+		Threads:   uint32(*backendConfig.Threads),
 	})
 }
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -6,11 +6,11 @@ import (
 	"os"
 	"path/filepath"

-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
 )

 func generateUniqueFileName(dir, baseName, ext string) string {
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -1,8 +1,8 @@
 package cli

 import (
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/cli/worker"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/cli/worker"
 )

 var CLI struct {
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -4,10 +4,12 @@ import (
 	"encoding/json"
 	"fmt"

-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/startup"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/startup"
 	"github.com/rs/zerolog/log"
 	"github.com/schollz/progressbar/v3"
 )
@@ -33,7 +35,7 @@ type ModelsCMD struct {
 }

 func (ml *ModelsList) Run(ctx *cliContext.Context) error {
-	var galleries []gallery.Gallery
+	var galleries []config.Gallery
 	if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
 	}
@@ -53,10 +55,11 @@ func (ml *ModelsList) Run(ctx *cliContext.Context) error {
 }

 func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
-	var galleries []gallery.Gallery
+	var galleries []config.Gallery
 	if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
 	}
+
 	for _, modelName := range mi.ModelArgs {

 		progressBar := progressbar.NewOptions(
@@ -78,13 +81,15 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			return err
 		}

-		model := gallery.FindModel(models, modelName, mi.ModelsPath)
-		if model == nil {
-			log.Error().Str("model", modelName).Msg("model not found")
-			return err
-		}
+		if !downloader.LooksLikeOCI(modelName) {
+			model := gallery.FindModel(models, modelName, mi.ModelsPath)
+			if model == nil {
+				log.Error().Str("model", modelName).Msg("model not found")
+				return err
+			}

-		log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
+			log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
+		}
 		err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName)
 		if err != nil {
 			return err
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -6,11 +6,11 @@ import (
 	"strings"
 	"time"

-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/http"
-	"github.com/go-skynet/LocalAI/core/p2p"
-	"github.com/go-skynet/LocalAI/core/startup"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http"
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/startup"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
--- a/core/cli/transcript.go
+++ b/core/cli/transcript.go
@@ -5,10 +5,10 @@ import (
 	"errors"
 	"fmt"

-	"github.com/go-skynet/LocalAI/core/backend"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/backend"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )

@@ -18,6 +18,7 @@ type TranscriptCMD struct {
 	Backend           string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
 	Model             string `short:"m" required:"" help:"Model name to run the TTS"`
 	Language          string `short:"l" help:"Language of the audio file"`
+	Translate         bool   `short:"c" help:"Translate the transcription to english"`
 	Threads           int    `short:"t" default:"1" help:"Number of threads used for parallel computation"`
 	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
@@ -50,7 +51,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
 		}
 	}()

-	tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
+	tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, ml, c, opts)
 	if err != nil {
 		return err
 	}
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -7,10 +7,10 @@ import (
 	"path/filepath"
 	"strings"

-	"github.com/go-skynet/LocalAI/core/backend"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/backend"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )

--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -5,7 +5,7 @@ import (

 	"github.com/rs/zerolog/log"

-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	gguf "github.com/thxcode/gguf-parser-go"
 )

--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -5,8 +5,9 @@ import (
 	"os"
 	"syscall"

-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/pkg/assets"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/pkg/assets"
+	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/rs/zerolog/log"
 )

@@ -27,17 +28,18 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 		return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
 	}

+	grpcProcess := assets.ResolvePath(
+		r.BackendAssetsPath,
+		"util",
+		"llama-cpp-rpc-server",
+	)
+
+	args := os.Args[4:]
+	args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
+
+	args = append([]string{grpcProcess}, args...)
 	return syscall.Exec(
-		assets.ResolvePath(
-			r.BackendAssetsPath,
-			"util",
-			"llama-cpp-rpc-server",
-		),
-		append([]string{
-			assets.ResolvePath(
-				r.BackendAssetsPath,
-				"util",
-				"llama-cpp-rpc-server",
-			)}, os.Args[4:]...),
+		grpcProcess,
+		args,
 		os.Environ())
 }
--- a/core/cli/worker/worker_nop2p.go
+++ b/core/cli/worker/worker_nop2p.go
@@ -6,7 +6,7 @@ package worker
 import (
 	"fmt"

-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
 )

 type P2P struct{}
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -10,9 +10,10 @@ import (
 	"os/exec"
 	"time"

-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/p2p"
-	"github.com/go-skynet/LocalAI/pkg/assets"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/pkg/assets"
+	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/phayes/freeport"
 	"github.com/rs/zerolog/log"
 )
@@ -71,13 +72,18 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 	go func() {
 		for {
 			log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
+
+			grpcProcess := assets.ResolvePath(
+				r.BackendAssetsPath,
+				"util",
+				"llama-cpp-rpc-server",
+			)
+
+			args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)
+			args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
+
 			cmd := exec.Command(
-				assets.ResolvePath(
-					r.BackendAssetsPath,
-					"util",
-					"llama-cpp-rpc-server",
-				),
-				append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)...,
+				grpcProcess, args...,
 			)

 			cmd.Env = os.Environ()
@@ -86,7 +92,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			cmd.Stdout = os.Stdout

 			if err := cmd.Start(); err != nil {
-				log.Error().Err(err).Msg("Failed to start llama-cpp-rpc-server")
+				log.Error().Any("grpcProcess", grpcProcess).Any("args", args).Err(err).Msg("Failed to start llama-cpp-rpc-server")
 			}

 			cmd.Wait()
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -6,8 +6,7 @@ import (
 	"encoding/json"
 	"time"

-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/xsysinfo"
+	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 )

@@ -36,7 +35,7 @@ type ApplicationConfig struct {

 	ModelLibraryURL string

-	Galleries []gallery.Gallery
+	Galleries []Gallery

 	BackendAssets     embed.FS
 	AssetsDestination string
@@ -180,10 +179,10 @@ func WithBackendAssets(f embed.FS) AppOption {
 func WithStringGalleries(galls string) AppOption {
 	return func(o *ApplicationConfig) {
 		if galls == "" {
-			o.Galleries = []gallery.Gallery{}
+			o.Galleries = []Gallery{}
 			return
 		}
-		var galleries []gallery.Gallery
+		var galleries []Gallery
 		if err := json.Unmarshal([]byte(galls), &galleries); err != nil {
 			log.Error().Err(err).Msg("failed loading galleries")
 		}
@@ -191,7 +190,7 @@ func WithStringGalleries(galls string) AppOption {
 	}
 }

-func WithGalleries(galleries []gallery.Gallery) AppOption {
+func WithGalleries(galleries []Gallery) AppOption {
 	return func(o *ApplicationConfig) {
 		o.Galleries = append(o.Galleries, galleries...)
 	}
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -5,10 +5,10 @@ import (
 	"regexp"
 	"strings"

-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/functions"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/utils"
 )

 const (
@@ -390,10 +390,6 @@ func (c *BackendConfig) Validate() bool {
 		}
 	}

-	if c.Name == "" {
-		return false
-	}
-
 	if c.Backend != "" {
 		// a regex that checks that is a string name with no special characters, except '-' and '_'
 		re := regexp.MustCompile(`^[a-zA-Z0-9-_]+$`)
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -11,9 +11,9 @@ import (
 	"sync"

 	"github.com/charmbracelet/glamour"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
 )
--- a/core/config/backend_config_test.go
+++ b/core/config/backend_config_test.go
@@ -16,7 +16,8 @@ var _ = Describe("Test cases for config related functions", func() {
 			Expect(err).To(BeNil())
 			defer os.Remove(tmp.Name())
 			_, err = tmp.WriteString(
-				`backend: "foo-bar"
+				`backend: "../foo-bar"
+name: "foo"
 parameters:
  model: "foo-bar"`)
 			Expect(err).ToNot(HaveOccurred())
--- a/core/config/gallery.go
+++ b/core/config/gallery.go
@@ -0,0 +1,6 @@
+package config
+
+type Gallery struct {
+	URL  string `json:"url" yaml:"url"`
+	Name string `json:"name" yaml:"name"`
+}
--- a/core/dependencies_manager/manager.go
+++ b/core/dependencies_manager/manager.go
@@ -5,8 +5,8 @@ import (
 	"os"
 	"path/filepath"

-	"github.com/go-skynet/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/utils"
 	"gopkg.in/yaml.v3"
 )

--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -7,19 +7,15 @@ import (
 	"path/filepath"
 	"strings"

-	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/imdario/mergo"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )

-type Gallery struct {
-	URL  string `json:"url" yaml:"url"`
-	Name string `json:"name" yaml:"name"`
-}
-
 // Installs a model from the gallery
-func InstallModelFromGallery(galleries []Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
+func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {

 	applyModel := func(model *GalleryModel) error {
 		name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
@@ -117,7 +113,7 @@ func FindModel(models []*GalleryModel, name string, basePath string) *GalleryMod
 // List available models
 // Models galleries are a list of yaml files that are hosted on a remote server (for example github).
 // Each yaml file contains a list of models that can be downloaded and optionally overrides to define a new model setting.
-func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryModel, error) {
+func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*GalleryModel, error) {
 	var models []*GalleryModel

 	// Get models from galleries
@@ -134,7 +130,7 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod

 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
-	err := downloader.GetURI(url, basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -146,7 +142,7 @@ func findGalleryURLFromReferenceURL(url string, basePath string) (string, error)
 	return refFile, err
 }

-func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) {
+func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel, error) {
 	var models []*GalleryModel = []*GalleryModel{}

 	if strings.HasSuffix(gallery.URL, ".ref") {
@@ -157,7 +153,7 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error)
 		}
 	}

-	err := downloader.GetURI(gallery.URL, basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(gallery.URL, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
--- a/core/gallery/gallery_suite_test.go
+++ b/core/gallery/gallery_suite_test.go
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -5,9 +5,11 @@ import (
 	"os"
 	"path/filepath"

-	"github.com/go-skynet/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/imdario/mergo"
+	lconfig "github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/utils"
+
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )
@@ -65,7 +67,7 @@ type PromptTemplate struct {

 func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
-	err := downloader.GetURI(url, basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -172,6 +174,15 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 			return fmt.Errorf("failed to marshal updated config YAML: %v", err)
 		}

+		backendConfig := lconfig.BackendConfig{}
+		err = yaml.Unmarshal(updatedConfigYAML, &backendConfig)
+		if err != nil {
+			return fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
+		}
+		if !backendConfig.Validate() {
+			return fmt.Errorf("failed to validate updated config YAML")
+		}
+
 		err = os.WriteFile(configFilePath, updatedConfigYAML, 0600)
 		if err != nil {
 			return fmt.Errorf("failed to write updated config file: %v", err)
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -5,7 +5,8 @@ import (
 	"os"
 	"path/filepath"

-	. "github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/mudler/LocalAI/core/config"
+	. "github.com/mudler/LocalAI/core/gallery"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
@@ -54,7 +55,7 @@ var _ = Describe("Model test", func() {
 			err = os.WriteFile(galleryFilePath, out, 0600)
 			Expect(err).ToNot(HaveOccurred())
 			Expect(filepath.IsAbs(galleryFilePath)).To(BeTrue(), galleryFilePath)
-			galleries := []Gallery{
+			galleries := []config.Gallery{
 				{
 					Name: "test",
 					URL:  "file://" + galleryFilePath,
--- a/core/gallery/op.go
+++ b/core/gallery/op.go
@@ -1,5 +1,7 @@
 package gallery

+import "github.com/mudler/LocalAI/core/config"
+
 type GalleryOp struct {
 	Id               string
 	GalleryModelName string
@@ -7,7 +9,7 @@ type GalleryOp struct {
 	Delete           bool

 	Req       GalleryModel
-	Galleries []Gallery
+	Galleries []config.Gallery
 }

 type GalleryOpStatus struct {
--- a/core/gallery/request.go
+++ b/core/gallery/request.go
@@ -3,6 +3,8 @@ package gallery
 import (
 	"fmt"
 	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
 )

 // GalleryModel is the struct used to represent a model in the gallery returned by the endpoint.
@@ -23,7 +25,7 @@ type GalleryModel struct {
 	// AdditionalFiles are used to add additional files to the model
 	AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
 	// Gallery is a reference to the gallery which contains the model
-	Gallery Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"`
+	Gallery config.Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"`
 	// Installed is used to indicate if the model is installed or not
 	Installed bool `json:"installed,omitempty" yaml:"installed,omitempty"`
 }
--- a/core/gallery/request_test.go
+++ b/core/gallery/request_test.go
@@ -1,7 +1,7 @@
 package gallery_test

 import (
-	. "github.com/go-skynet/LocalAI/pkg/gallery"
+	. "github.com/mudler/LocalAI/core/gallery"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -6,16 +6,16 @@ import (
 	"net/http"
 	"strings"

-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/utils"

-	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
-	"github.com/go-skynet/LocalAI/core/http/routes"
+	"github.com/mudler/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/http/endpoints/openai"
+	"github.com/mudler/LocalAI/core/http/routes"

-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/model"

 	"github.com/gofiber/contrib/fiberzerolog"
 	"github.com/gofiber/fiber/v2"
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -13,15 +13,15 @@ import (
 	"path/filepath"
 	"runtime"

-	"github.com/go-skynet/LocalAI/core/config"
-	. "github.com/go-skynet/LocalAI/core/http"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/startup"
+	"github.com/mudler/LocalAI/core/config"
+	. "github.com/mudler/LocalAI/core/http"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/startup"

-	"github.com/go-skynet/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
@@ -74,7 +74,7 @@ func getModelStatus(url string) (response map[string]interface{}) {

 func getModels(url string) (response []gallery.GalleryModel) {
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	downloader.GetURI(url, "", func(url string, i []byte) error {
+	downloader.DownloadAndUnmarshal(url, "", func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
@@ -247,7 +247,7 @@ var _ = Describe("API test", func() {
 			err = os.WriteFile(filepath.Join(modelDir, "gallery_simple.yaml"), out, 0600)
 			Expect(err).ToNot(HaveOccurred())

-			galleries := []gallery.Gallery{
+			galleries := []config.Gallery{
 				{
 					Name: "test",
 					URL:  "file://" + filepath.Join(modelDir, "gallery_simple.yaml"),
@@ -603,7 +603,7 @@ var _ = Describe("API test", func() {

 			c, cancel = context.WithCancel(context.Background())

-			galleries := []gallery.Gallery{
+			galleries := []config.Gallery{
 				{
 					Name: "model-gallery",
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -4,8 +4,8 @@ import (
 	"fmt"
 	"strings"

-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )

--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -6,9 +6,9 @@ import (

 	"github.com/chasefleming/elem-go"
 	"github.com/chasefleming/elem-go/attrs"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/xsync"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/xsync"
 )

 const (
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -1,13 +1,13 @@
 package elevenlabs

 import (
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
-	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/pkg/model"

-	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"
 )

--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -1,14 +1,14 @@
 package jina

 import (
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"

-	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )

--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -1,9 +1,9 @@
 package localai

 import (
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
 )

 func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -5,15 +5,16 @@ import (
 	"fmt"
 	"slices"

-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/services"
 	"github.com/rs/zerolog/log"
 )

 type ModelGalleryEndpointService struct {
-	galleries      []gallery.Gallery
+	galleries      []config.Gallery
 	modelPath      string
 	galleryApplier *services.GalleryService
 }
@@ -24,7 +25,7 @@ type GalleryModel struct {
 	gallery.GalleryModel
 }

-func CreateModelGalleryEndpointService(galleries []gallery.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
+func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
 	return ModelGalleryEndpointService{
 		galleries:      galleries,
 		modelPath:      modelPath,
@@ -129,12 +130,12 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib

 func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(gallery.Gallery)
+		input := new(config.Gallery)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
-		if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+		if slices.ContainsFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		}) {
 			return fmt.Errorf("%s already exists", input.Name)
@@ -151,17 +152,17 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.

 func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(gallery.Gallery)
+		input := new(config.Gallery)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
-		if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+		if !slices.ContainsFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		}) {
 			return fmt.Errorf("%s is not currently registered", input.Name)
 		}
-		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		})
 		return c.Send(nil)
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@@ -3,9 +3,9 @@ package localai
 import (
 	"time"

-	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/adaptor"
+	"github.com/mudler/LocalAI/core/services"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 )

--- a/core/http/endpoints/localai/stores.go
+++ b/core/http/endpoints/localai/stores.go
@@ -1,12 +1,12 @@
 package localai

 import (
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/store"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/store"
 )

 func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -1,13 +1,13 @@
 package localai

 import (
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
-	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/pkg/model"

-	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"
 )

--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -1,11 +1,11 @@
 package localai

 import (
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/internal"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/internal"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func WelcomeEndpoint(appConfig *config.ApplicationConfig,
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -9,10 +9,10 @@ import (
 	"sync/atomic"
 	"time"

-	"github.com/go-skynet/LocalAI/core/config"
-	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )

@@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 			}
 		}

-		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find "))
+		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find %q", assistantID))
 	}
 }

--- a/core/http/endpoints/openai/assistant_test.go
+++ b/core/http/endpoints/openai/assistant_test.go
@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"net/http"
 	"net/http/httptest"
 	"os"
@@ -13,9 +12,9 @@ import (
 	"testing"
 	"time"

-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/stretchr/testify/assert"
 )

@@ -183,7 +182,7 @@ func TestAssistantEndpoints(t *testing.T) {
 				assert.NoError(t, err)
 				assert.Equal(t, tt.expectedStatus, response.StatusCode)
 				if tt.expectedStatus != fiber.StatusOK {
-					all, _ := ioutil.ReadAll(response.Body)
+					all, _ := io.ReadAll(response.Body)
 					assert.Equal(t, tt.expectedStringResult, string(all))
 				} else {
 					var result []Assistant
@@ -279,6 +278,7 @@ func TestAssistantEndpoints(t *testing.T) {
 		assert.NoError(t, err)
 		var getAssistant Assistant
 		err = json.NewDecoder(modifyResponse.Body).Decode(&getAssistant)
+		assert.NoError(t, err)

 		t.Cleanup(cleanupAllAssistants(t, app, []string{getAssistant.ID}))

@@ -391,7 +391,10 @@ func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId s
 	}

 	var assistantFile AssistantFile
-	all, err := ioutil.ReadAll(resp.Body)
+	all, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return AssistantFile{}, resp, err
+	}
 	err = json.NewDecoder(strings.NewReader(string(all))).Decode(&assistantFile)
 	if err != nil {
 		return AssistantFile{}, resp, err
@@ -422,8 +425,7 @@ func createAssistant(app *fiber.App, ar AssistantRequest) (Assistant, *http.Resp

 	var resultAssistant Assistant
 	err = json.NewDecoder(strings.NewReader(string(bodyString))).Decode(&resultAssistant)
-
-	return resultAssistant, resp, nil
+	return resultAssistant, resp, err
 }

 func cleanupAllAssistants(t *testing.T, app *fiber.App, ids []string) func() {
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -8,13 +8,13 @@ import (
 	"strings"
 	"time"

-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/functions"
-	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
+	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -8,14 +8,14 @@ import (
 	"fmt"
 	"time"

-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/functions"
-	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
+	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -5,13 +5,13 @@ import (
 	"fmt"
 	"time"

-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/core/schema"
-	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/schema"
+	model "github.com/mudler/LocalAI/pkg/model"

 	"github.com/rs/zerolog/log"
 )
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -5,12 +5,12 @@ import (
 	"fmt"
 	"time"

-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"

-	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/schema"

 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@@ -8,10 +8,10 @@ import (
 	"sync/atomic"
 	"time"

-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/pkg/utils"
 )

 var UploadedFiles []File
--- a/core/http/endpoints/openai/files_test.go
+++ b/core/http/endpoints/openai/files_test.go
@@ -13,10 +13,10 @@ import (

 	"github.com/rs/zerolog/log"

-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"

-	utils2 "github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
+	utils2 "github.com/mudler/LocalAI/pkg/utils"
 	"github.com/stretchr/testify/assert"

 	"testing"
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -13,14 +13,14 @@ import (
 	"strings"
 	"time"

-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"

-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"

-	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )

--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -1,11 +1,11 @@
 package openai

 import (
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"

-	"github.com/go-skynet/LocalAI/core/schema"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/schema"
+	model "github.com/mudler/LocalAI/pkg/model"
 )

 func ComputeChoices(
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -1,9 +1,9 @@
 package openai

 import (
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
 )

 func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -2,19 +2,16 @@ package openai

 import (
 	"context"
-	"encoding/base64"
 	"encoding/json"
 	"fmt"
-	"io"
-	"net/http"
-	"strings"

-	"github.com/go-skynet/LocalAI/core/config"
-	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/functions"
-	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )

@@ -39,41 +36,6 @@ func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfi
 	return modelFile, input, err
 }

-// this function check if the string is an URL, if it's an URL downloads the image in memory
-// encodes it in base64 and returns the base64 string
-func getBase64Image(s string) (string, error) {
-	if strings.HasPrefix(s, "http") {
-		// download the image
-		resp, err := http.Get(s)
-		if err != nil {
-			return "", err
-		}
-		defer resp.Body.Close()
-
-		// read the image data into memory
-		data, err := io.ReadAll(resp.Body)
-		if err != nil {
-			return "", err
-		}
-
-		// encode the image data in base64
-		encoded := base64.StdEncoding.EncodeToString(data)
-
-		// return the base64 string
-		return encoded, nil
-	}
-
-	// if the string instead is prefixed with "data:image/...;base64,", drop it
-	dropPrefix := []string{"data:image/jpeg;base64,", "data:image/png;base64,"}
-	for _, prefix := range dropPrefix {
-		if strings.HasPrefix(s, prefix) {
-			return strings.ReplaceAll(s, prefix, ""), nil
-		}
-	}
-
-	return "", fmt.Errorf("not valid string")
-}
-
 func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
@@ -187,7 +149,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 					input.Messages[i].StringContent = pp.Text
 				} else if pp.Type == "image_url" {
 					// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
-					base64, err := getBase64Image(pp.ImageURL.URL)
+					base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
 					if err == nil {
 						input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
 						// set a placeholder for each image
@@ -295,5 +257,9 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c
 	// Set the parameters for the language model prediction
 	updateRequestConfig(cfg, input)

+	if !cfg.Validate() {
+		return nil, nil, fmt.Errorf("failed to validate config")
+	}
+
 	return cfg, input, err
 }
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -8,9 +8,9 @@ import (
 	"path"
 	"path/filepath"

-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	model "github.com/mudler/LocalAI/pkg/model"

 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
@@ -32,7 +32,7 @@ func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a

 		config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
 		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+			return fmt.Errorf("failed reading parameters from request: %w", err)
 		}
 		// retrieve the file data from the request
 		file, err := c.FormFile("file")
@@ -65,7 +65,7 @@ func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a

 		log.Debug().Msgf("Audio file copied to: %+v", dst)

-		tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig)
+		tr, err := backend.ModelTranscription(dst, input.Language, input.Translate, ml, *config, appConfig)
 		if err != nil {
 			return err
 		}
--- a/core/http/render.go
+++ b/core/http/render.go
@@ -7,10 +7,10 @@ import (
 	"net/http"

 	"github.com/Masterminds/sprig/v3"
-	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
 	fiberhtml "github.com/gofiber/template/html/v2"
 	"github.com/microcosm-cc/bluemonday"
+	"github.com/mudler/LocalAI/core/schema"
 	"github.com/russross/blackfriday"
 )

@@ -21,14 +21,13 @@ func notFoundHandler(c *fiber.Ctx) error {
 	// Check if the request accepts JSON
 	if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
 		// The client expects a JSON response
-		c.Status(fiber.StatusNotFound).JSON(schema.ErrorResponse{
+		return c.Status(fiber.StatusNotFound).JSON(schema.ErrorResponse{
 			Error: &schema.APIError{Message: "Resource not found", Code: fiber.StatusNotFound},
 		})
 	} else {
 		// The client expects an HTML response
-		c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
+		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
 	}
-	return nil
 }

 func renderEngine() *fiberhtml.Engine {
--- a/core/http/routes/elevenlabs.go
+++ b/core/http/routes/elevenlabs.go
@@ -1,10 +1,10 @@
 package routes

 import (
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/endpoints/elevenlabs"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterElevenLabsRoutes(app *fiber.App,
--- a/core/http/routes/jina.go
+++ b/core/http/routes/jina.go
@@ -1,11 +1,11 @@
 package routes

 import (
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/jina"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/endpoints/jina"

-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterJINARoutes(app *fiber.App,
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -1,13 +1,13 @@
 package routes

 import (
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/internal"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/swagger"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/internal"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterLocalAIRoutes(app *fiber.App,
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -1,12 +1,12 @@
 package routes

 import (
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/http/endpoints/openai"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterOpenAIRoutes(app *fiber.App,
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -6,14 +6,14 @@ import (
 	"sort"
 	"strings"

-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/http/elements"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/internal"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/xsync"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/http/elements"
+	"github.com/mudler/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/internal"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/xsync"
 	"github.com/rs/zerolog/log"

 	"github.com/gofiber/fiber/v2"
@@ -26,6 +26,7 @@ func RegisterUIRoutes(app *fiber.App,
 	appConfig *config.ApplicationConfig,
 	galleryService *services.GalleryService,
 	auth func(*fiber.Ctx) error) {
+	tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.

 	// keeps the state of models that are being installed from the UI
 	var processingModels = xsync.NewSyncedMap[string, string]()
@@ -235,7 +236,7 @@ func RegisterUIRoutes(app *fiber.App,

 	// Show the Chat page
 	app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
-		backendConfigs := cl.GetAllBackendConfigs()
+		backendConfigs, _ := tmpLMS.ListModels("", true)

 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + c.Params("model"),
@@ -249,7 +250,7 @@ func RegisterUIRoutes(app *fiber.App,
 	})

 	app.Get("/talk/", auth, func(c *fiber.Ctx) error {
-		backendConfigs := cl.GetAllBackendConfigs()
+		backendConfigs, _ := tmpLMS.ListModels("", true)

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
@@ -259,7 +260,7 @@ func RegisterUIRoutes(app *fiber.App,
 		summary := fiber.Map{
 			"Title":        "LocalAI - Talk",
 			"ModelsConfig": backendConfigs,
-			"Model":        backendConfigs[0].Name,
+			"Model":        backendConfigs[0].ID,
 			"Version":      internal.PrintableVersion(),
 		}

@@ -269,7 +270,7 @@ func RegisterUIRoutes(app *fiber.App,

 	app.Get("/chat/", auth, func(c *fiber.Ctx) error {

-		backendConfigs := cl.GetAllBackendConfigs()
+		backendConfigs, _ := tmpLMS.ListModels("", true)

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
@@ -277,9 +278,9 @@ func RegisterUIRoutes(app *fiber.App,
 		}

 		summary := fiber.Map{
-			"Title":        "LocalAI - Chat with " + backendConfigs[0].Name,
+			"Title":        "LocalAI - Chat with " + backendConfigs[0].ID,
 			"ModelsConfig": backendConfigs,
-			"Model":        backendConfigs[0].Name,
+			"Model":        backendConfigs[0].ID,
 			"Version":      internal.PrintableVersion(),
 		}

--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -100,10 +100,10 @@ SOFTWARE.
        <option value="" disabled class="text-gray-400" >Select a model</option>
        {{ $model:=.Model}}
        {{ range .ModelsConfig }}
-        {{ if eq .Name $model }}
-        <option value="/chat/{{.Name}}" selected  class="bg-gray-700 text-white">{{.Name}}</option>
+        {{ if eq .ID $model }}
+        <option value="/chat/{{.ID}}" selected  class="bg-gray-700 text-white">{{.ID}}</option>
        {{ else }}
-        <option value="/chat/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
+        <option value="/chat/{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
        {{ end }}
        {{ end }}
      </select>
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ettore Di Giacinto	95f773ee4b	experiment: build with a single image with all the deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-01 19:43:18 +02:00
LocalAI [bot]	ad85c5a1e7	models(gallery): ⬆️ update checksum (#2690 ) ⬆️ Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-01 00:23:58 +00:00
LocalAI [bot]	421eb8a727	⬆️ Update ggerganov/llama.cpp (#2689 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-07-01 00:20:11 +00:00
Ettore Di Giacinto	b7ff441cc0	models(gallery): add llama3-turbcat-instruct-8b (#2687 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-30 14:40:01 +02:00
LocalAI [bot]	83d867ad46	⬆️ Update ggerganov/llama.cpp (#2683 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-30 01:51:51 +00:00
Ettore Di Giacinto	6acba2bcbe	models(gallery): add llm-compiler (#2684 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-29 23:32:01 +02:00
Ettore Di Giacinto	6a2a10603c	fix(talk): identify the model by ID instead of name (#2685 ) This fixes a breakage in rendering the template. Now the models passed by to the renderer have the ID field rather then Name Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-29 23:31:50 +02:00
Ettore Di Giacinto	356907a5cf	models(gallery): add llama3-8b-darkidol-2.1-uncensored-1048k-iq-imatrix (#2686 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-29 23:31:37 +02:00
Ettore Di Giacinto	7ab7a188d0	models(gallery): add bungo-l3-8b-iq-imatrix (#2682 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-29 22:05:27 +02:00
Ettore Di Giacinto	ff1a5bfc62	models(gallery): add l3-aethora-15b-v2 (#2679 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-29 11:33:00 +02:00
LocalAI [bot]	522f185baf	⬆️ Update docs version mudler/LocalAI (#2676 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-29 01:20:50 +00:00
LocalAI [bot]	f7b5a4ca7d	models(gallery): ⬆️ update checksum (#2678 ) ⬆️ Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-29 01:06:08 +00:00
LocalAI [bot]	1d30955677	⬆️ Update ggerganov/llama.cpp (#2677 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-29 00:43:02 +00:00
Ettore Di Giacinto	d3307e93d3	models(gallery): add new-dawn-llama (#2672 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-28 23:33:27 +02:00
LocalAI [bot]	8d9a452e4b	⬆️ Update ggerganov/llama.cpp (#2671 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-28 10:09:01 +02:00
Ettore Di Giacinto	466eb82845	ci: add latest tag for vulkan images Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-28 09:04:33 +02:00
LocalAI [bot]	7e562d10a3	⬆️ Update ggerganov/llama.cpp (#2652 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-28 01:30:37 +00:00
Ettore Di Giacinto	7b1e792732	deps(llama.cpp): bump to latest, update build variables (#2669 ) * arrow_up: Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * deps(llama.cpp): update build variables to follow upstream Update build recipes with https://github.com/ggerganov/llama.cpp/pull/8006 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Disable shared libs by default in llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Disable shared libs in llama.cpp Makefile Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Disable metal embedding for now, until it is tested Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(mac): explicitly enable metal Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix typo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-27 23:10:04 +02:00
Ettore Di Giacinto	30b883affe	models(gallery): add gemma2-9b-it and gemma2-27b-it (#2670 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-27 23:08:22 +02:00
Tachi Koma	20ec4d0342	telegram-bot example: Update LocalAI version (fixes #2638 ) (#2640 ) * Update LocalAI version for telegram-bot example (fixes #2638) * Update examples/telegram-bot/docker-compose.yml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-27 14:35:50 +02:00
Ettore Di Giacinto	a9f8460086	fix: typo in gallery model Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-27 08:54:13 +02:00
LocalAI [bot]	98b3b2b1ab	feat(swagger): update swagger (#2666 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-26 23:12:53 +00:00
Ettore Di Giacinto	e8bc0a789b	models(gallery): add arcee-spark (#2665 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-26 20:06:40 +02:00
Ettore Di Giacinto	2b6a2c7dde	models(gallery): add einstein v7 qwen2 (#2664 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-26 17:52:52 +02:00
Ettore Di Giacinto	c8c8238f9d	models(gallery): add dark-idol-1.2 (#2663 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-26 17:52:17 +02:00
Ettore Di Giacinto	3eaf59021c	feat(grammar): expose properties_order (#2662 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-26 14:59:02 +02:00
Ettore Di Giacinto	a8bfb6f9c2	feat(options): add `repeat_last_n` (#2660 ) feat(options): add repeat_last_n Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-26 14:58:50 +02:00
Ettore Di Giacinto	b783c811db	feat(build): only build llama.cpp relevant targets (#2659 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-26 14:58:38 +02:00
Ettore Di Giacinto	59af0e77af	feat(ui): allow to select between all the available models in the chat (#2657 ) feat(ui): let the chat to select from all the detected models Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-25 20:22:17 +02:00
Ettore Di Giacinto	5d83c8d3a2	Update quickstart.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-25 19:23:58 +02:00
Ettore Di Giacinto	8f968d0341	Update quickstart.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-25 19:18:43 +02:00
Ettore Di Giacinto	f93fe30350	ci: vulkan not ready for arm64 yet Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-25 18:11:57 +02:00
Ettore Di Giacinto	784ccf97ba	ci: adjust max-parallel Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-25 15:14:43 +02:00
LocalAI [bot]	a0163dafce	feat(swagger): update swagger (#2651 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-25 10:50:52 +02:00
Ettore Di Giacinto	f072cb3cd0	fix(cli): remove duplicate alias (#2654 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-25 10:08:13 +02:00
Ettore Di Giacinto	e84b31935c	feat(vulkan): add vulkan support to the llama.cpp backend (#2648 ) feat(vulkan): add vulkan support to llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-24 20:04:58 +02:00
Ettore Di Giacinto	03b1cf51fd	feat(whisper): add translate option (#2649 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-24 19:21:22 +02:00
Ettore Di Giacinto	9e6dec0bc4	fix(install.sh): not all systems have nproc Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-24 18:21:20 +02:00
Ettore Di Giacinto	04b01cd62c	ci: put a cap on parallel runs Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-24 18:08:09 +02:00
Ettore Di Giacinto	a181dd0ebc	refactor: gallery inconsistencies (#2647 ) * refactor(gallery): move under core/ Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(unarchive): do not allow symlinks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-24 17:32:12 +02:00
Ettore Di Giacinto	69206fcd4b	fix(install.sh): move ARCH detection so it works also for mac (#2646 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-24 10:34:35 +02:00
Ettore Di Giacinto	2c94e15746	fix(install.sh): fix version typo (#2645 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-24 10:30:17 +02:00
Dave	12513ebae0	rf: centralize base64 image handling (#2595 ) contains simple fixes to warnings and errors, removes a broken / outdated test, runs go mod tidy, and as the actual change, centralizes base64 image handling Signed-off-by: Dave Lee <dave@gray101.com>	2024-06-24 08:34:36 +02:00
LocalAI [bot]	4156a4f15f	⬆️ Update ggerganov/llama.cpp (#2632 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-23 22:21:38 +00:00
Ettore Di Giacinto	491bb4f174	Update hermes-2-pro-mistral.yaml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-23 15:17:41 +02:00
Sertaç Özercan	5866fc8ded	chore: fix go.mod module (#2635 ) Signed-off-by: Sertac Ozercan <sozercan@gmail.com>	2024-06-23 08:24:36 +00:00
Ettore Di Giacinto	eb4cd78ca6	ci: run master jobs on self-hosted Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-23 10:15:53 +02:00
Ettore Di Giacinto	40ce71855a	ci: disable max-parallelism on master	2024-06-22 23:28:09 +02:00
Ettore Di Giacinto	9c0d0afd09	ci: bump parallel jobs (#2633 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-22 23:24:46 +02:00
Ettore Di Giacinto	0f9aa1ef91	fix(install.sh): install CUDA toolkit only if CUDA is detected Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-22 12:21:59 +02:00
Ettore Di Giacinto	3ee5ceb9fa	Update kubernetes.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 12:16:55 +02:00
Ettore Di Giacinto	1bd72a3be5	Update kubernetes.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 12:16:27 +02:00
Ettore Di Giacinto	fbd14118bf	Update kubernetes.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 12:14:53 +02:00
Ettore Di Giacinto	515d98b978	Update model-gallery.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 12:10:49 +02:00
Ettore Di Giacinto	789cf6c599	Update model-gallery.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 12:10:27 +02:00
Ettore Di Giacinto	0bc82d7270	fix(install.sh): properly detect suse distros Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-22 12:08:48 +02:00
Ettore Di Giacinto	9a7ad75bff	docs: update to include installer and update advanced YAML options (#2631 ) * docs: update quickstart and advanced sections Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs: improvements Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * examples(kubernete): add nvidia example Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-22 12:00:38 +02:00
Ettore Di Giacinto	9fb3e4040b	Update README.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 10:29:46 +02:00
Ettore Di Giacinto	070fd1b9da	Update distributed_inferencing.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 10:06:09 +02:00
Ettore Di Giacinto	dda5b9f260	Update distributed_inferencing.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-22 10:05:48 +02:00
Ettore Di Giacinto	8d84dd4f88	fix(worker): use dynaload for single binaries (#2620 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-22 09:33:18 +02:00
Ettore Di Giacinto	f569237a50	feat(oci): support OCI images and Ollama models (#2628 ) * Support specifying oci:// and ollama:// for model URLs Fixes: https://github.com/mudler/LocalAI/issues/2527 Fixes: https://github.com/mudler/LocalAI/issues/1028 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Lower watcher warnings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Allow to install ollama models from CLI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Do not keep file ownership Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Skip test on darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-22 08:17:41 +02:00
LocalAI [bot]	e265a618d9	models(gallery): ⬆️ update checksum (#2630 ) ⬆️ Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-22 04:45:41 +00:00
LocalAI [bot]	533343c84f	⬆️ Update ggerganov/llama.cpp (#2629 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-22 02:28:06 +00:00
Ettore Di Giacinto	260f2e1d94	fix(install.sh): correctly handle systemd service installation (#2627 ) Fixup install.sh systemd service installation Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-21 23:56:06 +02:00
Ettore Di Giacinto	964732590d	models(gallery): add hermes-2-theta-llama-3-70b (#2626 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-21 19:41:49 +02:00
LocalAI [bot]	70a2bfe82e	⬆️ Update ggerganov/llama.cpp (#2617 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-21 06:41:34 +00:00
Ettore Di Giacinto	ba2d969c44	models(gallery): add qwen2-1.5b-ita (#2615 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-20 20:35:53 +02:00
Ettore Di Giacinto	d3c78cf4d7	models(gallery): add magnum-72b-v1 (#2614 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-20 20:31:23 +02:00
Ettore Di Giacinto	34afd891a6	models(gallery): add llama3-8b-darkidol-1.1-iq-imatrix (#2613 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-20 20:30:47 +02:00
Ettore Di Giacinto	d3137775a1	models(gallery): add llama-3-cursedstock-v1.8-8b-iq-imatrix (#2612 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-20 20:14:48 +02:00
Ettore Di Giacinto	e1772026a1	models(gallery): add llama-3-sec-chat (#2611 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-20 20:14:03 +02:00
LocalAI [bot]	d0423254dd	⬆️ Update ggerganov/llama.cpp (#2606 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-20 00:58:40 +00:00
LocalAI [bot]	db0e52ae9d	⬆️ Update docs version mudler/LocalAI (#2605 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-20 00:05:19 +00:00
LocalAI [bot]	4f030f9cd3	models(gallery): ⬆️ update checksum (#2607 ) ⬆️ Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-19 22:20:17 +02:00
Ettore Di Giacinto	60fb45eb97	models(gallery): add l3-umbral-mind-rp-v1.0-8b-iq-imatrix (#2608 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-19 22:19:40 +02:00
Rene Leonhardt	43f0688a95	feat: Upgrade to CUDA 12.5 (#2601 ) Signed-off-by: Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>	2024-06-19 17:50:49 +02:00
LocalAI [bot]	8142bdc48f	⬆️ Update ggerganov/llama.cpp (#2603 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-19 00:28:50 +00:00
Ettore Di Giacinto	89a11e15e7	fix(single-binary): bundle ld.so (#2602 ) * debug * fix copy command/silly muscle memory Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * remove tmate * Debugging * Start binary with ld.so if present in libdir Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * small refactor Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-18 22:43:43 +02:00
Ettore Di Giacinto	06de542032	feat(talk): display an informative box, better colors (#2600 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-18 15:10:01 +02:00
Ettore Di Giacinto	ecbb61cbf4	feat(sd-3): add stablediffusion 3 support (#2591 ) * feat(sd-3): add stablediffusion 3 support Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * deps(diffusers): add sentencepiece Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(gallery): add stablediffusion-3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-06-18 15:09:39 +02:00
Ettore Di Giacinto	7f13e3a783	docs(models): fixup top message Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-06-18 08:42:30 +02:00
LocalAI [bot]	c926469b9c	⬆️ Update ggerganov/llama.cpp (#2594 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-18 03:06:31 +00:00
LocalAI [bot]	c30b57a629	⬆️ Update docs version mudler/LocalAI (#2593 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-06-18 01:47:04 +00:00