debu2

debug
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 03:02:38 -05:00 · 2024-07-22 12:21:55 +02:00 · 2024-07-22 10:17:41 +02:00
125 changed files with 732 additions and 2411 deletions
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -6,17 +6,4 @@ VAR=$3

 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")

-# Read $VAR from Makefile (only first match)
-set +e
-CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
-set -e
-
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
-
-if [ -z "$CURRENT_COMMIT" ]; then
-    echo "Could not find $VAR in Makefile."
-    exit 0
-fi
-
-echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$REPO_message.txt"
-echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$REPO_message.txt"
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -40,14 +40,8 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Bump dependencies 🔧
-        id: bump
        run: |
          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
-          {
-            echo 'message<<EOF'
-            cat "${{ matrix.repository }}_message.txt"
-            echo EOF
-          } >> "$GITHUB_OUTPUT"
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v6
        with:
@@ -56,7 +50,7 @@ jobs:
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
          branch: "update/${{ matrix.variable }}"
-          body:  ${{ steps.bump.outputs.message }}
+          body: Bump of ${{ matrix.repository }} version
          signoff: true


--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -41,7 +41,7 @@ jobs:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'chore(model-gallery): :arrow_up: update checksum'
+          title: 'models(gallery): :arrow_up: update checksum'
          branch: "update/checksum"
          body: Updating checksums in gallery/index.yaml
          signoff: true
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -47,7 +47,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          # - build-type: 'cublas'
          #   cuda-major-version: "12"
-          #   cuda-minor-version: "0"
+          #   cuda-minor-version: "4"
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,10 +31,11 @@ jobs:
        with:
          go-version: '1.21.x'
          cache: false
+
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
      - name: Install CUDA Dependencies
        run: |
@@ -150,7 +151,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -251,7 +252,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Build stablediffusion
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
+          sudo apt-get install build-essential curl ffmpeg
          sudo apt-get install -y libgmock-dev
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
--- a/4
+++ b/4
@@ -24,7 +24,7 @@ RUN apt-get update && \
        cmake \
        curl \
        git \
-        unzip upx-ucl && \
+        unzip && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers

 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=0
+ARG CUDA_MINOR_VERSION=4

 ENV BUILD_TYPE=${BUILD_TYPE}

--- a/48
+++ b/48
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0d6fb52be0c1b7e77eb855f3adc4952771c8ce4c
+CPPLLAMA_VERSION?=07283b1a90e1320aae4762c7e03c879043910252

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=fe36c909715e6751277ddb020e7892c7670b61d4
+WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')

 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=-s -w
+LD_FLAGS?=
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"

@@ -72,14 +72,6 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
-	UPX=
-else
-	UPX=$(shell which upx)
-endif
-
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1

@@ -385,7 +377,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
+	ls -liah backend-assets/grpc
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -480,7 +472,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -742,22 +734,13 @@ backend-assets/grpc: protogen-go replace
 backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bert-embeddings
-endif

 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/gpt4all
-endif

 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/huggingface
-endif

 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -859,50 +842,29 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/llama-ggml
-endif

 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/piper
-endif

 backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/rwkv
-endif

 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion
-endif

 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/tinydream
-endif

 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/whisper
-endif

 backend-assets/grpc/local-store: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/local-store
-endif

 grpcs: prepare $(GRPC_BACKENDS)

--- a/README.md
+++ b/README.md
@@ -84,7 +84,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

 Hot topics (looking for contributors):

- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
@@ -151,7 +150,6 @@ Other:

 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)

- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
 - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2259,6 +2259,7 @@ static void params_parse(const backend::ModelOptions* request,
     // get the directory of modelfile
     std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
     params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
+     params.lora_base  =  model_dir + "/"+request->lorabase();
    }
    params.use_mlock = request->mlock();
    params.use_mmap = request->mmap();
--- a/backend/python/autogptq/requirements-cublas11.txt
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/autogptq/requirements-cublas12.txt
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
+torch
 certifi
 transformers
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -122,13 +122,6 @@ function installRequirements() {
        requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
    fi

-    # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
-    if [ "x${BUILD_TYPE}" == "x" ]; then
-        requirementFiles+=("${MY_DIR}/requirements-cpu.txt")
-    fi
-
-    requirementFiles+=("${MY_DIR}/requirements-after.txt")
-
    for reqFile in ${requirementFiles[@]}; do
        if [ -f ${reqFile} ]; then
            echo "starting requirements install for ${reqFile}"
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -3,10 +3,11 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.4
+grpcio==1.65.0
 opencv-python
 pillow
 protobuf
 sentencepiece
+torch
 transformers
 certifi
--- a/backend/python/exllama/requirements-cublas11.txt
+++ b/backend/python/exllama/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/exllama/requirements-cublas12.txt
+++ b/backend/python/exllama/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,5 +1,6 @@
 grpcio==1.65.0
 protobuf
+torch
 transformers
 certifi
 setuptools
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
+torch
 wheel
 setuptools
--- a/backend/python/mamba/requirements-after.txt
+++ b/backend/python/mamba/requirements-after.txt
@@ -1,2 +0,0 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -3,4 +3,5 @@
 # https://github.com/Dao-AILab/causal-conv1d/issues/24
 packaging
 setuptools
-wheel
+wheel
+torch==2.3.1
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,4 +1,6 @@
-grpcio==1.65.1
+causal-conv1d==1.4.0
+mamba-ssm==2.2.2
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.4
+grpcio==1.64.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 librosa
 faster-whisper
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh

 # Download checkpoints if not present
 if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
    unzip checkpoints_v2.zip
 fi

--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
+torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
 transformers
--- a/backend/python/petals/requirements-cublas11.txt
+++ b/backend/python/petals/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/petals/requirements-cublas12.txt
+++ b/backend/python/petals/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 sentence-transformers==3.0.1
 transformers
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
 transformers
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
+torch
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,3 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,8 +1,9 @@
 accelerate
 transformers
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
+torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements-cublas11.txt
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/vall-e-x/requirements-cublas12.txt
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/vllm/requirements-cublas.txt
+++ b/backend/python/vllm/requirements-cublas.txt
@@ -0,0 +1 @@
+flash-attn
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-flash-attn
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-flash-attn
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -83,9 +83,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			return err
 		}

-		modelURI := downloader.URI(modelName)
-
-		if !modelURI.LooksLikeOCI() {
+		if !downloader.LooksLikeOCI(modelName) {
 			model := gallery.FindModel(models, modelName, mi.ModelsPath)
 			if model == nil {
 				log.Error().Str("model", modelName).Msg("model not found")
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -86,8 +86,8 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 		var errs error = nil
 		for _, uri := range hfscmd.ToScan {
 			log.Info().Str("uri", uri).Msg("scanning specific uri")
-			scanResults, err := downloader.HuggingFaceScan(downloader.URI(uri))
-			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
+			scanResults, err := downloader.HuggingFaceScan(uri)
+			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
 				errs = errors.Join(errs, err)
 			}
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -8,6 +8,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/utils"
 )

 const (
@@ -71,9 +72,9 @@ type BackendConfig struct {
 }

 type File struct {
-	Filename string         `yaml:"filename" json:"filename"`
-	SHA256   string         `yaml:"sha256" json:"sha256"`
-	URI      downloader.URI `yaml:"uri" json:"uri"`
+	Filename string `yaml:"filename" json:"filename"`
+	SHA256   string `yaml:"sha256" json:"sha256"`
+	URI      string `yaml:"uri" json:"uri"`
 }

 type VallE struct {
@@ -212,32 +213,28 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool {
 // MMProjFileName returns the filename of the MMProj file
 // If the MMProj is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) MMProjFileName() string {
-	uri := downloader.URI(c.MMProj)
-	if uri.LooksLikeURL() {
-		f, _ := uri.FilenameFromUrl()
-		return f
+	modelURL := downloader.ConvertURL(c.MMProj)
+	if downloader.LooksLikeURL(modelURL) {
+		return utils.MD5(modelURL)
 	}

 	return c.MMProj
 }

 func (c *BackendConfig) IsMMProjURL() bool {
-	uri := downloader.URI(c.MMProj)
-	return uri.LooksLikeURL()
+	return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj))
 }

 func (c *BackendConfig) IsModelURL() bool {
-	uri := downloader.URI(c.Model)
-	return uri.LooksLikeURL()
+	return downloader.LooksLikeURL(downloader.ConvertURL(c.Model))
 }

 // ModelFileName returns the filename of the model
 // If the model is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) ModelFileName() string {
-	uri := downloader.URI(c.Model)
-	if uri.LooksLikeURL() {
-		f, _ := uri.FilenameFromUrl()
-		return f
+	modelURL := downloader.ConvertURL(c.Model)
+	if downloader.LooksLikeURL(modelURL) {
+		return utils.MD5(modelURL)
 	}

 	return c.Model
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -244,7 +244,7 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)

-			if err := file.URI.DownloadFile(filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
+			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
 				return err
 			}
 		}
@@ -252,10 +252,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 		// If the model is an URL, expand it, and download the file
 		if config.IsModelURL() {
 			modelFileName := config.ModelFileName()
-			uri := downloader.URI(config.Model)
+			modelURL := downloader.ConvertURL(config.Model)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
@@ -269,10 +269,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {

 		if config.IsMMProjURL() {
 			modelFileName := config.MMProjFileName()
-			uri := downloader.URI(config.MMProj)
+			modelURL := downloader.ConvertURL(config.MMProj)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -26,17 +26,15 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
-	RepeatPenalty float64
 }

 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
-		RepeatPenalty: 1.0,
 		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
-			Chat:        "{{.Input }}\n<start_of_turn>model\n",
-			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
+			Chat:        "{{.Input }}\n<|start_of_turn|>model\n",
+			ChatMessage: "<|start_of_turn|>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<|end_of_turn|>",
 			Completion:  "{{.Input}}",
 		},
 	},
@@ -194,9 +192,6 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 		if len(cfg.StopWords) == 0 {
 			cfg.StopWords = settings.StopWords
 		}
-		if cfg.RepeatPenalty == 0.0 {
-			cfg.RepeatPenalty = settings.RepeatPenalty
-		}
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
@@ -224,7 +219,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
 	commandR := arch == "command-r" && eosTokenID == 255001
 	qwen2 := arch == "qwen2"
 	phi3 := arch == "phi-3"
-	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
+	gemma := strings.HasPrefix(f.Model().Name, "gemma")
 	deepseek2 := arch == "deepseek2"

 	switch {
--- a/core/dependencies_manager/manager.go
+++ b/core/dependencies_manager/manager.go
@@ -37,8 +37,7 @@ func main() {

 	// download the assets
 	for _, asset := range assets {
-		uri := downloader.URI(asset.URL)
-		if err := uri.DownloadFile(filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
+		if err := downloader.DownloadFile(asset.URL, filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
 			panic(err)
 		}
 	}
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -131,8 +131,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal

 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
-	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -154,9 +153,8 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
 			return models, err
 		}
 	}
-	uri := downloader.URI(gallery.URL)

-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(gallery.URL, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
@@ -206,34 +204,35 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
 		log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
 	}

-	var filesToRemove []string
-
 	// Remove additional files
 	if galleryconfig != nil {
 		for _, f := range galleryconfig.Files {
 			fullPath := filepath.Join(basePath, f.Filename)
-			filesToRemove = append(filesToRemove, fullPath)
+			log.Debug().Msgf("Removing file %s", fullPath)
+			if e := os.Remove(fullPath); e != nil {
+				err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
+			}
 		}
 	}

 	for _, f := range additionalFiles {
 		fullPath := filepath.Join(filepath.Join(basePath, f))
-		filesToRemove = append(filesToRemove, fullPath)
-	}
-
-	filesToRemove = append(filesToRemove, configFile)
-	filesToRemove = append(filesToRemove, galleryFile)
-
-	// skip duplicates
-	filesToRemove = utils.Unique(filesToRemove)
-
-	// Removing files
-	for _, f := range filesToRemove {
-		if e := os.Remove(f); e != nil {
+		log.Debug().Msgf("Removing additional file %s", fullPath)
+		if e := os.Remove(fullPath); e != nil {
 			err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
 		}
 	}

+	log.Debug().Msgf("Removing model config file %s", configFile)
+
+	// Delete the model config file
+	if e := os.Remove(configFile); e != nil {
+		err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
+	}
+
+	// Delete gallery config file
+	os.Remove(galleryFile)
+
 	return err
 }

@@ -254,8 +253,8 @@ func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error

 func SafetyScanGalleryModel(galleryModel *GalleryModel) error {
 	for _, file := range galleryModel.AdditionalFiles {
-		scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
-		if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
+		scanResults, err := downloader.HuggingFaceScan(file.URI)
+		if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 			log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 			return err
 		}
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -68,8 +68,7 @@ type PromptTemplate struct {

 func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
-	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -119,14 +118,14 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 		filePath := filepath.Join(basePath, file.Filename)

 		if enforceScan {
-			scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
-			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
+			scanResults, err := downloader.HuggingFaceScan(file.URI)
+			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 				return err
 			}
 		}
-		uri := downloader.URI(file.URI)
-		if err := uri.DownloadFile(filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
+
+		if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
 			return err
 		}
 	}
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -73,9 +73,8 @@ func getModelStatus(url string) (response map[string]interface{}) {
 }

 func getModels(url string) (response []gallery.GalleryModel) {
-	uri := downloader.URI(url)
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
+	downloader.DownloadAndUnmarshal(url, "", func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -9,6 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/xsync"
 )

 const (
@@ -371,12 +372,7 @@ func dropBadChars(s string) string {
 	return strings.ReplaceAll(s, "@", "__")
 }

-type ProcessTracker interface {
-	Exists(string) bool
-	Get(string) string
-}
-
-func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
+func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
 	modelsElements := []elem.Node{}
 	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
 		return elem.Div(
@@ -400,7 +396,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g

 	actionDiv := func(m *gallery.GalleryModel) elem.Node {
 		galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
-		currentlyProcessing := processTracker.Exists(galleryID)
+		currentlyProcessing := processing.Exists(galleryID)
 		jobID := ""
 		isDeletionOp := false
 		if currentlyProcessing {
@@ -408,7 +404,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 			if status != nil && status.Deletion {
 				isDeletionOp = true
 			}
-			jobID = processTracker.Get(galleryID)
+			jobID = processing.Get(galleryID)
 			// TODO:
 			// case not handled, if status == nil : "Waiting"
 		}
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -17,10 +17,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		backendConfigs := cl.GetAllBackendConfigs()

 		galleryConfigs := map[string]*gallery.Config{}
-		modelsWithBackendConfig := map[string]interface{}{}
-
 		for _, m := range backendConfigs {
-			modelsWithBackendConfig[m.Name] = nil

 			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
 			if err != nil {
@@ -35,7 +32,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		modelsWithoutConfig := []string{}

 		for _, m := range models {
-			if _, ok := modelsWithBackendConfig[m]; !ok {
+			if _, ok := galleryConfigs[m]; !ok {
 				modelsWithoutConfig = append(modelsWithoutConfig, m)
 			}
 		}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -226,15 +226,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup

 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
-			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
-			if err == nil {
-				config.Grammar = g
-			}
+			config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		case input.JSONFunctionGrammarObject != nil:
-			g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
-			if err == nil {
-				config.Grammar = g
-			}
+			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -21,40 +21,6 @@ import (
 	"github.com/google/uuid"
 )

-type modelOpCache struct {
-	status *xsync.SyncedMap[string, string]
-}
-
-func NewModelOpCache() *modelOpCache {
-	return &modelOpCache{
-		status: xsync.NewSyncedMap[string, string](),
-	}
-}
-
-func (m *modelOpCache) Set(key string, value string) {
-	m.status.Set(key, value)
-}
-
-func (m *modelOpCache) Get(key string) string {
-	return m.status.Get(key)
-}
-
-func (m *modelOpCache) DeleteUUID(uuid string) {
-	for _, k := range m.status.Keys() {
-		if m.status.Get(k) == uuid {
-			m.status.Delete(k)
-		}
-	}
-}
-
-func (m *modelOpCache) Map() map[string]string {
-	return m.status.Map()
-}
-
-func (m *modelOpCache) Exists(key string) bool {
-	return m.status.Exists(key)
-}
-
 func RegisterUIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
@@ -63,7 +29,7 @@ func RegisterUIRoutes(app *fiber.App,
 	auth func(*fiber.Ctx) error) {

 	// keeps the state of models that are being installed from the UI
-	var processingModels = NewModelOpCache()
+	var processingModels = xsync.NewSyncedMap[string, string]()

 	// modelStatus returns the current status of the models being processed (installation or deletion)
 	// it is called asynchonously from the UI
@@ -266,8 +232,6 @@ func RegisterUIRoutes(app *fiber.App,
 			return c.SendString(elements.ProgressBar("100"))
 		}
 		if status.Error != nil {
-			// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
-			processingModels.DeleteUUID(jobUID)
 			return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
 		}

@@ -282,7 +246,12 @@ func RegisterUIRoutes(app *fiber.App,
 		status := galleryService.GetStatus(jobUID)

 		galleryID := ""
-		processingModels.DeleteUUID(jobUID)
+		for _, k := range processingModels.Keys() {
+			if processingModels.Get(k) == jobUID {
+				galleryID = k
+				processingModels.Delete(k)
+			}
+		}
 		if galleryID == "" {
 			log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
 		}
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -16,16 +16,7 @@
                </a> 
            </h2> 
            <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
-            <!-- Warning box if p2p token is empty and p2p is enabled -->
-            {{ if and .IsP2PEnabled (eq .P2PToken "") }}
-            <div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
-                <p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
-                <p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
-                    Check out the documentation for more information.
-                </a> </p>
-            </div>
-            {{ else }}
-
+            
            <!-- Federation Box -->
            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">

@@ -137,8 +128,7 @@
                    </div>
                </div>
            </div>
-            <!-- Llama.cpp Box END -->    
-            {{ end }}   
+            <!-- Llama.cpp Box END -->       
        </div>
    </div>

--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -11,7 +11,7 @@ This functionality enables LocalAI to distribute inference requests across multi
 LocalAI supports two modes of distributed inferencing via p2p:

 - **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
+- **Worker Mode**: Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).

 ## Usage

@@ -122,6 +122,12 @@ The server logs should indicate that new workers are being discovered.

 ![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)

+## Notes
+
+- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
+- Only a single model is supported currently.
+- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
+- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)

 ## Environment Variables

@@ -132,20 +138,3 @@ There are options that can be tweaked or parameters that can be set using enviro
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
-
-## Architecture
-
-LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers. 
-
-[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
-
-The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
-
-## Notes
-
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
-
-
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.4"
+  "version": "v2.19.1"
 }
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -194,7 +194,7 @@ install_container_toolkit_yum() {
    curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
    $SUDO  tee /etc/yum.repos.d/nvidia-container-toolkit.repo

-    if [ "$PACKAGE_MANAGER" = "dnf" ]; then
+    if [ "$PACKAGE_MANAGER" == "dnf" ]; then
        $SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
    else 
        $SUDO $PACKAGE_MANAGER -y install yum-utils
@@ -629,7 +629,7 @@ case "$ARCH" in
    *) fatal "Unsupported architecture: $ARCH" ;;
 esac

-if [ "$OS" = "Darwin" ]; then
+if [ "$OS" == "Darwin" ]; then
    install_binary_darwin
    exit 0
 fi
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
--- a/embedded/embedded.go
+++ b/embedded/embedded.go
@@ -38,8 +38,8 @@ func init() {

 func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
 	remoteLibrary := map[string]string{}
-	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(_ string, i []byte) error {
+
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(_ string, i []byte) error {
 		return yaml.Unmarshal(i, &remoteLibrary)
 	})
 	if err != nil {
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,6 +1,6 @@
-llama_index==0.10.59
+llama_index==0.10.55
 requests==2.32.3
-weaviate_client==4.6.7
+weaviate_client==4.6.5
 transformers
 torch
 chainlit
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.12
-openai==1.37.0
+langchain==0.2.8
+openai==1.35.13
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.12
-openai==1.37.0
-chromadb==0.5.5
-llama-index==0.10.56
+langchain==0.2.8
+openai==1.35.13
+chromadb==0.5.4
+llama-index==0.10.55
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -10,21 +10,21 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.12
-langchain-community==0.2.9
+langchain==0.2.8
+langchain-community==0.2.7
 marshmallow==3.21.3
 marshmallow-enum==1.5.1
 multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.0.1
-openai==1.37.1
+numpy==1.26.4
+openai==1.35.13
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2
 PyYAML==6.0.1
 requests==2.32.3
-SQLAlchemy==2.0.32
+SQLAlchemy==2.0.30
 tenacity==8.5.0
 tqdm==4.66.4
 typing-inspect==0.9.0
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.37.1
+streamlit==1.36.0
 requests
--- a/gallery/alpaca.yaml
+++ b/gallery/alpaca.yaml
@@ -1,17 +0,0 @@
---
-name: "alpaca"
-
-config_file: |
-  context_size: 4096
-  f16: true
-  mmap: true
-  template:
-    chat: |
-        Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-        ### Instruction:
-        {{.Input}}
-
-        ### Response:
-    completion: |
-        {{.Input}}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ettore Di Giacinto	f3e170b79f	debu2	2024-07-22 12:21:55 +02:00
Ettore Di Giacinto	84ab2f3d11	debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-22 10:17:41 +02:00