chore(model gallery): add qihoo360_tinyr1-32b-preview

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
chore(model gallery): add ibm-granite_granite-3.2-2b-instruct (#4928 )
2026-02-02 18:53:32 -05:00 · 2025-03-02 10:23:17 +01:00 · 2025-03-02 10:22:35 +01:00 · 2025-03-02 10:19:27 +01:00 · 2025-03-02 10:02:49 +01:00 · 2025-03-01 21:16:10 +01:00
133 changed files with 243795 additions and 2719 deletions
--- a/Generation/musicgen.bru
+++ b/Generation/musicgen.bru
@@ -1,23 +0,0 @@
-meta {
-  name: musicgen
-  type: http
-  seq: 1
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model_id": "facebook/musicgen-small",
-      "text": "Exciting 80s Newscast Interstitial",
-      "duration_seconds": 8
-  }
-}
--- a/Requests/backend
+++ b/Requests/backend
@@ -1,17 +0,0 @@
-meta {
-  name: backend monitor
-  type: http
-  seq: 4
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
-  body: json
-  auth: none
-}
-
-body:json {
-  {
-    "model": "{{DEFAULT_MODEL}}"
-  }
-}
--- a/monitor/backend-shutdown.bru
+++ b/monitor/backend-shutdown.bru
@@ -1,21 +0,0 @@
-meta {
-  name: backend-shutdown
-  type: http
-  seq: 3
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}"
-  }
-}
--- a/Requests/bruno.json
+++ b/Requests/bruno.json
@@ -1,5 +0,0 @@
-{
-  "version": "1",
-  "name": "LocalAI Test Requests",
-  "type": "collection"
-}
--- a/Requests/environments/localhost.bru
+++ b/Requests/environments/localhost.bru
@@ -1,6 +0,0 @@
-vars {
-  HOST: localhost
-  PORT: 8080
-  DEFAULT_MODEL: gpt-3.5-turbo
-  PROTOCOL: http://
-}
--- a/.bruno/LocalAI
+++ b/.bruno/LocalAI
@@ -1,11 +0,0 @@
-meta {
-  name: get models list
-  type: http
-  seq: 2
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
-  body: none
-  auth: none
-}
--- a/generation/Generate
+++ b/generation/Generate
@@ -1,25 +0,0 @@
-meta {
-  name: Generate image
-  type: http
-  seq: 1
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-    "prompt": "<positive prompt>|<negative prompt>",
-    "model": "model-name",
-    "step": 51,
-    "size": "1024x1024",
-    "image": ""
-  }
-}
--- a/text/-completions.bru
+++ b/text/-completions.bru
@@ -1,24 +0,0 @@
-meta {
-  name: -completions
-  type: http
-  seq: 4
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "prompt": "function downloadFile(string url, string outputPath) {",
-      "max_tokens": 256,
-      "temperature": 0.5
-  }
-}
--- a/text/-edits.bru
+++ b/text/-edits.bru
@@ -1,23 +0,0 @@
-meta {
-  name: -edits
-  type: http
-  seq: 5
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "input": "What day of the wek is it?",
-      "instruction": "Fix the spelling mistakes"
-  }
-}
--- a/text/-embeddings.bru
+++ b/text/-embeddings.bru
@@ -1,22 +0,0 @@
-meta {
-  name: -embeddings
-  type: http
-  seq: 6
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
-  }
-}
--- a/text/chat/chat
+++ b/text/chat/chat
@@ -1,30 +0,0 @@
-meta {
-  name: chat completion -simple- 1 message-
-  type: http
-  seq: 4
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-    "model": "{{DEFAULT_MODEL}}",
-    "messages": [
-      {
-        "role": "user",
-        "content": "How could one use friction to cook an egg?"
-      }
-    ],
-    "max_tokens": 256,
-    "temperature": 0.2,
-    "grammar": ""
-  }
-}
--- a/text/chat/chat-completions
+++ b/text/chat/chat-completions
@@ -1,29 +0,0 @@
-meta {
-  name: chat-completions -long-
-  type: http
-  seq: 5
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-       "model": "{{DEFAULT_MODEL}}",
-       "messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
-           {"role": "user", "content": "How could one use electricity to cook an egg?"},
-           {"role": "assistant",
-                  "content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
-              },
-              {"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
-       "max_tokens": 1024,
-       "temperature": 0.5
-  }
-}
--- a/text/chat/chat-completions
+++ b/text/chat/chat-completions
@@ -1,25 +0,0 @@
-meta {
-  name: chat-completions -stream-
-  type: http
-  seq: 6
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-       "model": "{{DEFAULT_MODEL}}",
-       "messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
-       "max_tokens": 256,
-       "temperature": 0.9,
-       "stream": true
-  }
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,22 +0,0 @@
-meta {
-  name: add model gallery
-  type: http
-  seq: 10
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
-      "name": "test"
-  }
-}
--- a/gallery/delete
+++ b/gallery/delete
@@ -1,21 +0,0 @@
-meta {
-  name: delete model gallery
-  type: http
-  seq: 11
-}
-
-delete {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "name": "test"
-  }
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,11 +0,0 @@
-meta {
-  name: list MODELS in galleries
-  type: http
-  seq: 7
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
-  body: none
-  auth: none
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,11 +0,0 @@
-meta {
-  name: list model GALLERIES
-  type: http
-  seq: 8
-}
-
-get {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: none
-  auth: none
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,11 +0,0 @@
-meta {
-  name: model delete
-  type: http
-  seq: 7
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: none
-  auth: none
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,21 +0,0 @@
-meta {
-  name: model gallery apply -gist-
-  type: http
-  seq: 12
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
-  }
-}
--- a/Requests/model
+++ b/Requests/model
@@ -1,22 +0,0 @@
-meta {
-  name: model gallery apply
-  type: http
-  seq: 9
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
-      "name": "codellama7b"
-  }
-}
--- a/Requests/transcription/gb1.ogg
+++ b/Requests/transcription/gb1.ogg
--- a/Requests/transcription/transcribe.bru
+++ b/Requests/transcription/transcribe.bru
@@ -1,16 +0,0 @@
-meta {
-  name: transcribe
-  type: http
-  seq: 1
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
-  body: multipartForm
-  auth: none
-}
-
-body:multipart-form {
-  file: @file(transcription/gb1.ogg)
-  model: whisper-1
-}
--- a/Requests/tts/-tts.bru
+++ b/Requests/tts/-tts.bru
@@ -1,22 +0,0 @@
-meta {
-  name: -tts
-  type: http
-  seq: 2
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "model": "{{DEFAULT_MODEL}}",
-      "input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
-  }
-}
--- a/Requests/tts/musicgen.bru
+++ b/Requests/tts/musicgen.bru
@@ -1,23 +0,0 @@
-meta {
-  name: musicgen
-  type: http
-  seq: 2
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
-  body: json
-  auth: none
-}
-
-headers {
-  Content-Type: application/json
-}
-
-body:json {
-  {
-      "backend": "transformers",
-      "model": "facebook/musicgen-small",
-      "input": "80s Synths playing Jazz"
-  }
-}
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,4 +1,4 @@
-enhancements:
+enhancement:
 - head-branch: ['^feature', 'feature']

 dependencies:
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -9,7 +9,7 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - repository: "ggerganov/llama.cpp"
+          - repository: "ggml-org/llama.cpp"
            variable: "CPPLLAMA_VERSION"
            branch: "master"
          - repository: "ggerganov/whisper.cpp"
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -33,7 +33,7 @@ jobs:
        run: |
          CGO_ENABLED=0 make build-api
      - name: rm
-        uses: appleboy/ssh-action@v1.2.0
+        uses: appleboy/ssh-action@v1.2.1
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
            rm: true
            target: ./local-ai
      - name: restarting
-        uses: appleboy/ssh-action@v1.2.0
+        uses: appleboy/ssh-action@v1.2.1
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -2,9 +2,10 @@ name: 'generate and publish GRPC docker caches'

 on:
  workflow_dispatch:
-  push:
-    branches:
-      - master
+
+  schedule:
+    # daily at midnight
+    - cron: '0 0 * * *'

 concurrency:
  group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -16,7 +17,7 @@ jobs:
      matrix:
        include:
          - grpc-base-image: ubuntu:22.04
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            platforms: 'linux/amd64,linux/arm64'
    runs-on: ${{matrix.runs-on}}
    steps:
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -310,6 +310,11 @@ jobs:
          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}

+      - name: Cleanup
+        run: |
+          docker builder prune -f
+          docker system prune --force --volumes --all
+
      - name: Latest tag
        # run this on branches, when it is a tag and there is a latest-image defined
        if: github.event_name != 'pull_request' && inputs.latest-image != ''  && github.ref_type == 'tag'
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
+Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/42
+++ b/42
@@ -6,9 +6,7 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true

 # llama.cpp versions
-GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
-GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=5598f475be3e31430fbe17ebb85654ec90dc201e
+CPPLLAMA_VERSION?=1782cdfed60952f9ff333fc2ab5245f2be702453

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -24,7 +22,7 @@ BARKCPP_VERSION?=v1.0.0

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
+STABLEDIFFUSION_GGML_VERSION?=19d876ee300a055629926ff836489901f734f2b7

 ONNX_VERSION?=1.20.0
 ONNX_ARCH?=x64
@@ -151,7 +149,6 @@ ifeq ($(BUILD_TYPE),hipblas)
 	LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
 	export CXX=$(ROCM_HOME)/llvm/bin/clang++
 	export CC=$(ROCM_HOME)/llvm/bin/clang
-	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
 	export GGML_HIP=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
@@ -188,7 +185,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
@@ -222,19 +218,6 @@ endif

 all: help

-## go-llama.cpp
-sources/go-llama.cpp:
-	mkdir -p sources/go-llama.cpp
-	cd sources/go-llama.cpp && \
-	git init && \
-	git remote add origin $(GOLLAMA_REPO) && \
-	git fetch origin && \
-	git checkout $(GOLLAMA_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
-	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
-
 ## bark.cpp
 sources/bark.cpp:
 	git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
@@ -310,19 +293,17 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

-get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
+get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp

 replace:
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp

 dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
-	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp

 prepare-sources: get-sources replace
 	$(GOCMD) mod download
@@ -330,7 +311,6 @@ prepare-sources: get-sources replace
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
-	$(MAKE) -C sources/go-llama.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-piper clean
 	$(MAKE) build
@@ -434,7 +414,7 @@ run: prepare ## run local-ai
 test-models/testmodel.ggml:
 	mkdir test-models
 	mkdir test-dir
-	wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
+	wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -449,8 +429,7 @@ test: prepare test-models/testmodel.ggml grpcs
 	export GO_TAGS="tts debug"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
-	$(MAKE) test-llama
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
 	$(MAKE) test-llama-gguf
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion
@@ -479,10 +458,6 @@ teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)

-test-llama: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-
 test-llama-gguf: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
@@ -760,13 +735,6 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server

-backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/llama-ggml
-endif
-
 backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
--- a/README.md
+++ b/README.md
@@ -212,7 +212,7 @@ A huge thank you to our generous sponsors who support this project covering CI e

 <p align="center">
  <a href="https://www.spectrocloud.com/" target="blank">
-    <img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
+    <img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
  </a>
  <a href="https://www.premai.io/" target="blank">
    <img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
--- a/aio/cpu/vad.yaml
+++ b/aio/cpu/vad.yaml
@@ -0,0 +1,8 @@
+backend: silero-vad
+name: silero-vad
+parameters:
+  model: silero-vad.onnx
+download_files:
+- filename: silero-vad.onnx
+  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
+  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -129,7 +129,7 @@ detect_gpu
 detect_gpu_size

 PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
+export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"

 check_vars

--- a/aio/gpu-8g/vad.yaml
+++ b/aio/gpu-8g/vad.yaml
@@ -0,0 +1,8 @@
+backend: silero-vad
+name: silero-vad
+parameters:
+  model: silero-vad.onnx
+download_files:
+- filename: silero-vad.onnx
+  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
+  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/intel/vad.yaml
+++ b/aio/intel/vad.yaml
@@ -0,0 +1,8 @@
+backend: silero-vad
+name: silero-vad
+parameters:
+  model: silero-vad.onnx
+download_files:
+- filename: silero-vad.onnx
+  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
+  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -165,7 +165,7 @@ message Reply {

 message GrammarTrigger {
  string word = 1;
-  bool at_start = 2; 
+  bool at_start = 2;
 }

 message ModelOptions {
@@ -229,6 +229,11 @@ message ModelOptions {
  int32  MaxModelLen = 54;
  int32  TensorParallelSize = 55;
  string LoadFormat = 58;
+  bool   DisableLogStatus = 66;
+  string DType = 67;
+  int32  LimitImagePerPrompt = 68;
+  int32  LimitVideoPerPrompt = 69;
+  int32  LimitAudioPerPrompt = 70;

  string MMProj = 41;

--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -1155,6 +1155,14 @@ struct llama_server_context
            slot.has_next_token = false;
        }

+        if (slot.n_past >= slot.n_ctx) {
+            slot.truncated      = true;
+            slot.stopped_limit = true;
+            slot.has_next_token = false;
+
+            LOG_VERBOSE("stopped due to running out of context capacity", {});
+        }
+
        if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
        {
            slot.stopped_eos = true;
@@ -1627,17 +1635,17 @@ struct llama_server_context
            {
                if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
                {
+                    // this check is redundant (for good)
+                    // we should never get here, because generation should already stopped in process_token()
+
                    // START LOCALAI changes
                    // Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969)
                    // See: https://github.com/mudler/LocalAI/issues/1333
                    // Context is exhausted, release the slot
                    slot.release();
                    send_final_response(slot);
-                    slot.cache_tokens.clear();
-                    slot.n_past = 0;
-                    slot.truncated = false;
-                    slot.has_next_token = true;
-                    LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
+                    slot.has_next_token = false;
+                    LOG_ERROR("context is exhausted, release the slot", {});

                    continue;
                    // END LOCALAI changes
--- a/backend/go/image/stablediffusion-ggml/gosd.cpp
+++ b/backend/go/image/stablediffusion-ggml/gosd.cpp
@@ -35,6 +35,8 @@ const char* sample_method_str[] = {
    "ipndm",
    "ipndm_v",
    "lcm",
+    "ddim_trailing",
+    "tcd",
 };

 // Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
@@ -173,6 +175,7 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
                            -1, //clip_skip
                            cfg_scale, // sfg_scale
                            3.5f,
+			    0, // eta
                            width,
                            height,
                            sample_method, 
--- a/backend/go/llm/llama-ggml/llama.go
+++ b/backend/go/llm/llama-ggml/llama.go
@@ -1,204 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-
-	"github.com/go-skynet/go-llama.cpp"
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-type LLM struct {
-	base.SingleThread
-
-	llama *llama.LLama
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	ropeFreqBase := float32(10000)
-	ropeFreqScale := float32(1)
-
-	if opts.RopeFreqBase != 0 {
-		ropeFreqBase = opts.RopeFreqBase
-	}
-	if opts.RopeFreqScale != 0 {
-		ropeFreqScale = opts.RopeFreqScale
-	}
-
-	llamaOpts := []llama.ModelOption{
-		llama.WithRopeFreqBase(ropeFreqBase),
-		llama.WithRopeFreqScale(ropeFreqScale),
-	}
-
-	if opts.NGQA != 0 {
-		llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
-	}
-
-	if opts.RMSNormEps != 0 {
-		llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
-	}
-
-	if opts.ContextSize != 0 {
-		llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
-	}
-	if opts.F16Memory {
-		llamaOpts = append(llamaOpts, llama.EnableF16Memory)
-	}
-	if opts.Embeddings {
-		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
-	}
-	if opts.NGPULayers != 0 {
-		llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
-	}
-
-	llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
-	llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
-	llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
-	if opts.NBatch != 0 {
-		llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
-	} else {
-		llamaOpts = append(llamaOpts, llama.SetNBatch(512))
-	}
-
-	if opts.NUMA {
-		llamaOpts = append(llamaOpts, llama.EnableNUMA)
-	}
-
-	if opts.LowVRAM {
-		llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
-	}
-
-	model, err := llama.New(opts.ModelFile, llamaOpts...)
-	llm.llama = model
-
-	return err
-}
-
-func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
-	ropeFreqBase := float32(10000)
-	ropeFreqScale := float32(1)
-
-	if opts.RopeFreqBase != 0 {
-		ropeFreqBase = opts.RopeFreqBase
-	}
-	if opts.RopeFreqScale != 0 {
-		ropeFreqScale = opts.RopeFreqScale
-	}
-	predictOptions := []llama.PredictOption{
-		llama.SetTemperature(opts.Temperature),
-		llama.SetTopP(opts.TopP),
-		llama.SetTopK(int(opts.TopK)),
-		llama.SetTokens(int(opts.Tokens)),
-		llama.SetThreads(int(opts.Threads)),
-		llama.WithGrammar(opts.Grammar),
-		llama.SetRopeFreqBase(ropeFreqBase),
-		llama.SetRopeFreqScale(ropeFreqScale),
-		llama.SetNegativePromptScale(opts.NegativePromptScale),
-		llama.SetNegativePrompt(opts.NegativePrompt),
-	}
-
-	if opts.PromptCacheAll {
-		predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
-	}
-
-	if opts.PromptCacheRO {
-		predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
-	}
-
-	// Expected absolute path
-	if opts.PromptCachePath != "" {
-		predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
-	}
-
-	if opts.Mirostat != 0 {
-		predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
-	}
-
-	if opts.MirostatETA != 0 {
-		predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
-	}
-
-	if opts.MirostatTAU != 0 {
-		predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
-	}
-
-	if opts.Debug {
-		predictOptions = append(predictOptions, llama.Debug)
-	}
-
-	predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
-
-	if opts.PresencePenalty != 0 {
-		predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
-	}
-
-	if opts.NKeep != 0 {
-		predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
-	}
-
-	if opts.Batch != 0 {
-		predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
-	}
-
-	if opts.F16KV {
-		predictOptions = append(predictOptions, llama.EnableF16KV)
-	}
-
-	if opts.IgnoreEOS {
-		predictOptions = append(predictOptions, llama.IgnoreEOS)
-	}
-
-	if opts.Seed != 0 {
-		predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
-	}
-
-	//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
-
-	predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
-	predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
-	predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
-	predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
-	predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
-	predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
-	predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
-	return predictOptions
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	predictOptions := buildPredictOptions(opts)
-
-	predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
-		results <- token
-		return true
-	}))
-
-	go func() {
-		_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
-		if err != nil {
-			fmt.Println("err: ", err)
-		}
-		close(results)
-	}()
-
-	return nil
-}
-
-func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
-	predictOptions := buildPredictOptions(opts)
-
-	if len(opts.EmbeddingTokens) > 0 {
-		tokens := []int{}
-		for _, t := range opts.EmbeddingTokens {
-			tokens = append(tokens, int(t))
-		}
-		return llm.llama.TokenEmbeddings(tokens, predictOptions...)
-	}
-
-	return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
-}
--- a/backend/go/llm/llama-ggml/main.go
+++ b/backend/go/llm/llama-ggml/main.go
@@ -1,19 +0,0 @@
-package main
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
-		panic(err)
-	}
-}
--- a/backend/python/coqui/requirements-cpu.txt
+++ b/backend/python/coqui/requirements-cpu.txt
@@ -1,4 +1,4 @@
-transformers
+transformers==4.48.3
 accelerate
 torch==2.4.1
 coqui-tts
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch==2.4.1+cu118
 torchaudio==2.4.1+cu118
-transformers
+transformers==4.48.3
 accelerate
 coqui-tts
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,5 +1,5 @@
 torch==2.4.1
 torchaudio==2.4.1
-transformers
+transformers==4.48.3
 accelerate
 coqui-tts
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch==2.4.1+rocm6.0
 torchaudio==2.4.1+rocm6.0
-transformers
+transformers==4.48.3
 accelerate
 coqui-tts
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -5,6 +5,6 @@ torchaudio==2.3.1+cxx11.abi
 oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
 setuptools
-transformers
+transformers==4.48.3
 accelerate
 coqui-tts
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -159,6 +159,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                torchType = torch.float16
                variant = "fp16"

+            options = request.Options
+
+            # empty dict
+            self.options = {}
+
+            # The options are a list of strings in this form optname:optvalue
+            # We are storing all the options in a dict so we can use it later when
+            # generating the images
+            for opt in options:
+                key, value = opt.split(":")
+                self.options[key] = value
+
            local = False
            modelFile = request.Model

@@ -441,6 +453,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
        kwargs = {key: options.get(key) for key in keys if key in options}

+        # populate kwargs from self.options.
+        kwargs.update(self.options)
+
        # Set seed
        if request.seed > 0:
            kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -109,6 +109,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            engine_args.swap_space = request.SwapSpace
        if request.MaxModelLen != 0:
            engine_args.max_model_len = request.MaxModelLen
+        if request.DisableLogStatus:
+            engine_args.disable_log_status = request.DisableLogStatus
+        if request.DType != "":
+            engine_args.dtype = request.DType
+        if request.LimitImagePerPrompt != 0 or request.LimitVideoPerPrompt != 0 or request.LimitAudioPerPrompt != 0:
+            # limit-mm-per-prompt defaults to 1 per modality, based on vLLM docs
+            engine_args.limit_mm_per_prompt = {
+                "image": max(request.LimitImagePerPrompt, 1),
+                "video": max(request.LimitVideoPerPrompt, 1),
+                "audio": max(request.LimitAudioPerPrompt, 1)
+            }

        try:
            self.llm = AsyncLLMEngine.from_engine_args(engine_args)
@@ -269,7 +280,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
    def load_image(self, image_path: str):
        """
        Load an image from the given file path or base64 encoded data.
-        
+
        Args:
            image_path (str): The path to the image file or base64 encoded data.

@@ -288,7 +299,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
    def load_video(self, video_path: str):
        """
        Load a video from the given file path.
-        
+
        Args:
            video_path (str): The path to the image file.

@@ -335,4 +346,4 @@ if __name__ == "__main__":
    )
    args = parser.parse_args()

-    asyncio.run(serve(args.addr))
+    asyncio.run(serve(args.addr))
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -145,13 +145,7 @@ func New(opts ...config.AppOption) (*Application, error) {

 	if options.LoadToMemory != nil {
 		for _, m := range options.LoadToMemory {
-			cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
-				config.LoadOptionDebug(options.Debug),
-				config.LoadOptionThreads(options.Threads),
-				config.LoadOptionContextSize(options.ContextSize),
-				config.LoadOptionF16(options.F16),
-				config.ModelPath(options.ModelPath),
-			)
+			cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
 			if err != nil {
 				return nil, err
 			}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -33,7 +33,7 @@ type TokenUsage struct {
 	TimingTokenGeneration  float64
 }

-func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model

 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
@@ -48,7 +48,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 		}
 	}

-	opts := ModelOptions(c, o)
+	opts := ModelOptions(*c, o)
 	inferenceModel, err := loader.Load(opts...)
 	if err != nil {
 		return nil, err
@@ -84,7 +84,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im

 	// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
 	fn := func() (LLMResponse, error) {
-		opts := gRPCPredictOpts(c, loader.ModelPath)
+		opts := gRPCPredictOpts(*c, loader.ModelPath)
 		opts.Prompt = s
 		opts.Messages = protoMessages
 		opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -159,6 +159,12 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		SwapSpace:            int32(c.SwapSpace),
 		MaxModelLen:          int32(c.MaxModelLen),
 		TensorParallelSize:   int32(c.TensorParallelSize),
+		DisableLogStatus:     c.DisableLogStatus,
+		DType:                c.DType,
+		// LimitMMPerPrompt vLLM
+		LimitImagePerPrompt:  int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
+		LimitVideoPerPrompt:  int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
+		LimitAudioPerPrompt:  int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
 		MMProj:               c.MMProj,
 		FlashAttention:       c.FlashAttention,
 		CacheTypeKey:         c.CacheTypeK,
--- a/core/backend/rerank.go
+++ b/core/backend/rerank.go
@@ -9,10 +9,10 @@ import (
 	model "github.com/mudler/LocalAI/pkg/model"
 )

-func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
-
-	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
+func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
+	opts := ModelOptions(backendConfig, appConfig)
 	rerankModel, err := loader.Load(opts...)
+
 	if err != nil {
 		return nil, err
 	}
--- a/core/backend/soundgeneration.go
+++ b/core/backend/soundgeneration.go
@@ -13,7 +13,6 @@ import (
 )

 func SoundGeneration(
-	modelFile string,
 	text string,
 	duration *float32,
 	temperature *float32,
@@ -25,8 +24,9 @@ func SoundGeneration(
 	backendConfig config.BackendConfig,
 ) (string, *proto.Result, error) {

-	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
+	opts := ModelOptions(backendConfig, appConfig)
 	soundGenModel, err := loader.Load(opts...)
+
 	if err != nil {
 		return "", nil, err
 	}
@@ -44,7 +44,7 @@ func SoundGeneration(

 	res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
 		Text:        text,
-		Model:       modelFile,
+		Model:       backendConfig.Model,
 		Dst:         filePath,
 		Sample:      doSample,
 		Duration:    duration,
--- a/core/backend/tokenize.go
+++ b/core/backend/tokenize.go
@@ -4,19 +4,17 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/grpc"
-	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {

-	modelFile := backendConfig.Model
-
 	var inferenceModel grpc.Backend
 	var err error

-	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
-
+	opts := ModelOptions(backendConfig, appConfig)
 	inferenceModel, err = loader.Load(opts...)
+
 	if err != nil {
 		return schema.TokenizeResponse{}, err
 	}
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
 			tks = append(tks, int(t))
 		}
 		tr.Segments = append(tr.Segments,
-			schema.Segment{
+			schema.TranscriptionSegment{
 				Text:   s.Text,
 				Id:     int(s.Id),
 				Start:  time.Duration(s.Start),
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -14,28 +14,22 @@ import (
 )

 func ModelTTS(
-	backend,
 	text,
-	modelFile,
 	voice,
 	language string,
 	loader *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
 	backendConfig config.BackendConfig,
 ) (string, *proto.Result, error) {
-	bb := backend
-	if bb == "" {
-		bb = model.PiperBackend
-	}
-
-	opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
+	opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
 	ttsModel, err := loader.Load(opts...)
+
 	if err != nil {
 		return "", nil, err
 	}

 	if ttsModel == nil {
-		return "", nil, fmt.Errorf("could not load piper model")
+		return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
 	}

 	if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
@@ -45,22 +39,21 @@ func ModelTTS(
 	fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
 	filePath := filepath.Join(appConfig.AudioDir, fileName)

-	// If the model file is not empty, we pass it joined with the model path
+	// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
+	// This should be addressed in a follow up PR soon.
+	// Copying it over nearly verbatim, as TTS backends are not functional without this.
 	modelPath := ""
-	if modelFile != "" {
-		// If the model file is not empty, we pass it joined with the model path
-		// Checking first that it exists and is not outside ModelPath
-		// TODO: we should actually first check if the modelFile is looking like
-		// a FS path
-		mp := filepath.Join(loader.ModelPath, modelFile)
-		if _, err := os.Stat(mp); err == nil {
-			if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
-				return "", nil, err
-			}
-			modelPath = mp
-		} else {
-			modelPath = modelFile
+	// Checking first that it exists and is not outside ModelPath
+	// TODO: we should actually first check if the modelFile is looking like
+	// a FS path
+	mp := filepath.Join(loader.ModelPath, backendConfig.Model)
+	if _, err := os.Stat(mp); err == nil {
+		if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
+			return "", nil, err
 		}
+		modelPath = mp
+	} else {
+		modelPath = backendConfig.Model // skip this step if it fails?????
 	}

 	res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
--- a/core/backend/vad.go
+++ b/core/backend/vad.go
@@ -0,0 +1,38 @@
+package backend
+
+import (
+	"context"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+func VAD(request *schema.VADRequest,
+	ctx context.Context,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	backendConfig config.BackendConfig) (*schema.VADResponse, error) {
+	opts := ModelOptions(backendConfig, appConfig)
+	vadModel, err := ml.Load(opts...)
+	if err != nil {
+		return nil, err
+	}
+	req := proto.VADRequest{
+		Audio: request.Audio,
+	}
+	resp, err := vadModel.VAD(ctx, &req)
+	if err != nil {
+		return nil, err
+	}
+
+	segments := []schema.VADSegment{}
+	for _, s := range resp.Segments {
+		segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
+	}
+
+	return &schema.VADResponse{
+		Segments: segments,
+	}, nil
+}
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@@ -86,13 +86,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
 	options := config.BackendConfig{}
 	options.SetDefaults()
 	options.Backend = t.Backend
+	options.Model = t.Model

 	var inputFile *string
 	if t.InputFile != "" {
 		inputFile = &t.InputFile
 	}

-	filePath, _, err := backend.SoundGeneration(t.Model, text,
+	filePath, _, err := backend.SoundGeneration(text,
 		parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
 		inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -52,8 +52,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {

 	options := config.BackendConfig{}
 	options.SetDefaults()
+	options.Backend = t.Backend
+	options.Model = t.Model

-	filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
+	filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
 	if err != nil {
 		return err
 	}
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -130,25 +130,28 @@ type LLMConfig struct {
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`

-	ContextSize          *int      `yaml:"context_size"`
-	NUMA                 bool      `yaml:"numa"`
-	LoraAdapter          string    `yaml:"lora_adapter"`
-	LoraBase             string    `yaml:"lora_base"`
-	LoraAdapters         []string  `yaml:"lora_adapters"`
-	LoraScales           []float32 `yaml:"lora_scales"`
-	LoraScale            float32   `yaml:"lora_scale"`
-	NoMulMatQ            bool      `yaml:"no_mulmatq"`
-	DraftModel           string    `yaml:"draft_model"`
-	NDraft               int32     `yaml:"n_draft"`
-	Quantization         string    `yaml:"quantization"`
-	LoadFormat           string    `yaml:"load_format"`
-	GPUMemoryUtilization float32   `yaml:"gpu_memory_utilization"` // vLLM
-	TrustRemoteCode      bool      `yaml:"trust_remote_code"`      // vLLM
-	EnforceEager         bool      `yaml:"enforce_eager"`          // vLLM
-	SwapSpace            int       `yaml:"swap_space"`             // vLLM
-	MaxModelLen          int       `yaml:"max_model_len"`          // vLLM
-	TensorParallelSize   int       `yaml:"tensor_parallel_size"`   // vLLM
-	MMProj               string    `yaml:"mmproj"`
+	ContextSize          *int               `yaml:"context_size"`
+	NUMA                 bool               `yaml:"numa"`
+	LoraAdapter          string             `yaml:"lora_adapter"`
+	LoraBase             string             `yaml:"lora_base"`
+	LoraAdapters         []string           `yaml:"lora_adapters"`
+	LoraScales           []float32          `yaml:"lora_scales"`
+	LoraScale            float32            `yaml:"lora_scale"`
+	NoMulMatQ            bool               `yaml:"no_mulmatq"`
+	DraftModel           string             `yaml:"draft_model"`
+	NDraft               int32              `yaml:"n_draft"`
+	Quantization         string             `yaml:"quantization"`
+	LoadFormat           string             `yaml:"load_format"`
+	GPUMemoryUtilization float32            `yaml:"gpu_memory_utilization"` // vLLM
+	TrustRemoteCode      bool               `yaml:"trust_remote_code"`      // vLLM
+	EnforceEager         bool               `yaml:"enforce_eager"`          // vLLM
+	SwapSpace            int                `yaml:"swap_space"`             // vLLM
+	MaxModelLen          int                `yaml:"max_model_len"`          // vLLM
+	TensorParallelSize   int                `yaml:"tensor_parallel_size"`   // vLLM
+	DisableLogStatus     bool               `yaml:"disable_log_stats"`      // vLLM
+	DType                string             `yaml:"dtype"`                  // vLLM
+	LimitMMPerPrompt     LimitMMPerPrompt   `yaml:"limit_mm_per_prompt"`    // vLLM
+	MMProj               string             `yaml:"mmproj"`

 	FlashAttention bool   `yaml:"flash_attention"`
 	NoKVOffloading bool   `yaml:"no_kv_offloading"`
@@ -166,6 +169,13 @@ type LLMConfig struct {
 	CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
 }

+// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
+type LimitMMPerPrompt struct {
+	LimitImagePerPrompt   int   `yaml:"image"`
+	LimitVideoPerPrompt   int   `yaml:"video"`
+	LimitAudioPerPrompt   int   `yaml:"audio"`
+}
+
 // AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
 type AutoGPTQ struct {
 	ModelBaseName    string `yaml:"model_base_name"`
@@ -212,7 +222,15 @@ func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
 		return err
 	}
 	*c = BackendConfig(aux)
+
 	c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
+	// Make sure the usecases are valid, we rewrite with what we identified
+	c.KnownUsecaseStrings = []string{}
+	for k, usecase := range GetAllBackendConfigUsecases() {
+		if c.HasUsecases(usecase) {
+			c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k)
+		}
+	}
 	return nil
 }

@@ -287,7 +305,8 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	defaultTopP := 0.95
 	defaultTopK := 40
 	defaultTemp := 0.9
-	defaultMirostat := 2
+	// https://github.com/mudler/LocalAI/issues/2780
+	defaultMirostat := 0
 	defaultMirostatTAU := 5.0
 	defaultMirostatETA := 0.1
 	defaultTypicalP := 1.0
@@ -436,19 +455,21 @@ func (c *BackendConfig) HasTemplate() bool {
 type BackendConfigUsecases int

 const (
-	FLAG_ANY              BackendConfigUsecases = 0b000000000
-	FLAG_CHAT             BackendConfigUsecases = 0b000000001
-	FLAG_COMPLETION       BackendConfigUsecases = 0b000000010
-	FLAG_EDIT             BackendConfigUsecases = 0b000000100
-	FLAG_EMBEDDINGS       BackendConfigUsecases = 0b000001000
-	FLAG_RERANK           BackendConfigUsecases = 0b000010000
-	FLAG_IMAGE            BackendConfigUsecases = 0b000100000
-	FLAG_TRANSCRIPT       BackendConfigUsecases = 0b001000000
-	FLAG_TTS              BackendConfigUsecases = 0b010000000
-	FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
+	FLAG_ANY              BackendConfigUsecases = 0b00000000000
+	FLAG_CHAT             BackendConfigUsecases = 0b00000000001
+	FLAG_COMPLETION       BackendConfigUsecases = 0b00000000010
+	FLAG_EDIT             BackendConfigUsecases = 0b00000000100
+	FLAG_EMBEDDINGS       BackendConfigUsecases = 0b00000001000
+	FLAG_RERANK           BackendConfigUsecases = 0b00000010000
+	FLAG_IMAGE            BackendConfigUsecases = 0b00000100000
+	FLAG_TRANSCRIPT       BackendConfigUsecases = 0b00001000000
+	FLAG_TTS              BackendConfigUsecases = 0b00010000000
+	FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
+	FLAG_TOKENIZE         BackendConfigUsecases = 0b01000000000
+	FLAG_VAD              BackendConfigUsecases = 0b10000000000

 	// Common Subsets
-	FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
+	FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
 )

 func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
@@ -463,10 +484,16 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
 		"FLAG_TRANSCRIPT":       FLAG_TRANSCRIPT,
 		"FLAG_TTS":              FLAG_TTS,
 		"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
+		"FLAG_TOKENIZE":         FLAG_TOKENIZE,
+		"FLAG_VAD":              FLAG_VAD,
 		"FLAG_LLM":              FLAG_LLM,
 	}
 }

+func stringToFlag(s string) string {
+	return "FLAG_" + strings.ToUpper(s)
+}
+
 func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
 	if len(input) == 0 {
 		return nil
@@ -474,7 +501,7 @@ func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
 	result := FLAG_ANY
 	flags := GetAllBackendConfigUsecases()
 	for _, str := range input {
-		flag, exists := flags["FLAG_"+strings.ToUpper(str)]
+		flag, exists := flags[stringToFlag(str)]
 		if exists {
 			result |= flag
 		}
@@ -548,5 +575,18 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
 		}
 	}

+	if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
+		tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
+		if !slices.Contains(tokenizeCapableBackends, c.Backend) {
+			return false
+		}
+	}
+
+	if (u & FLAG_VAD) == FLAG_VAD {
+		if c.Backend != "silero-vad" {
+			return false
+		}
+	}
+
 	return true
 }
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -81,10 +81,10 @@ func readMultipleBackendConfigsFromFile(file string, opts ...ConfigLoaderOption)
 	c := &[]*BackendConfig{}
 	f, err := os.ReadFile(file)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read config file: %w", err)
+		return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot read config file %q: %w", file, err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
-		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+		return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot unmarshal config file %q: %w", file, err)
 	}

 	for _, cc := range *c {
@@ -101,10 +101,10 @@ func readBackendConfigFromFile(file string, opts ...ConfigLoaderOption) (*Backen
 	c := &BackendConfig{}
 	f, err := os.ReadFile(file)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read config file: %w", err)
+		return nil, fmt.Errorf("readBackendConfigFromFile cannot read config file %q: %w", file, err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
-		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+		return nil, fmt.Errorf("readBackendConfigFromFile cannot unmarshal config file %q: %w", file, err)
 	}

 	c.SetDefaults(opts...)
@@ -117,7 +117,9 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
 	// Load a config file if present after the model name
 	cfg := &BackendConfig{
 		PredictionOptions: schema.PredictionOptions{
-			Model: modelName,
+			BasicModelRequest: schema.BasicModelRequest{
+				Model: modelName,
+			},
 		},
 	}

@@ -145,6 +147,15 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
 	return cfg, nil
 }

+func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
+	return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
+		LoadOptionDebug(appConfig.Debug),
+		LoadOptionThreads(appConfig.Threads),
+		LoadOptionContextSize(appConfig.ContextSize),
+		LoadOptionF16(appConfig.F16),
+		ModelPath(appConfig.ModelPath))
+}
+
 // This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
 func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
 	bcl.Lock()
@@ -167,7 +178,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoa
 	defer bcl.Unlock()
 	c, err := readBackendConfigFromFile(file, opts...)
 	if err != nil {
-		return fmt.Errorf("cannot read config file: %w", err)
+		return fmt.Errorf("LoadBackendConfig cannot read config file %q: %w", file, err)
 	}

 	if c.Validate() {
@@ -324,9 +335,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
 	bcl.Lock()
 	defer bcl.Unlock()
+
 	entries, err := os.ReadDir(path)
 	if err != nil {
-		return fmt.Errorf("cannot read directory '%s': %w", path, err)
+		return fmt.Errorf("LoadBackendConfigsFromPath cannot read directory '%s': %w", path, err)
 	}
 	files := make([]fs.FileInfo, 0, len(entries))
 	for _, entry := range entries {
@@ -344,13 +356,13 @@ func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...
 		}
 		c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
 		if err != nil {
-			log.Error().Err(err).Msgf("cannot read config file: %s", file.Name())
+			log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadBackendConfigsFromPath cannot read config file")
 			continue
 		}
 		if c.Validate() {
 			bcl.configs[c.Name] = *c
 		} else {
-			log.Error().Err(err).Msgf("config is not valid")
+			log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")
 		}
 	}

--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -161,10 +161,11 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 	}

 	// We try to guess only if we don't have a template defined already
-	f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
+	guessPath := filepath.Join(modelPath, cfg.ModelFileName())
+	f, err := gguf.ParseGGUFFile(guessPath)
 	if err != nil {
 		// Only valid for gguf files
-		log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
+		log.Debug().Str("filePath", guessPath).Msg("guessDefaultsFromFile: not a GGUF file")
 		return
 	}

--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -29,6 +29,8 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
 			if err != nil {
 				return err
 			}
+			config.Description = model.Description
+			config.License = model.License
 		} else if len(model.ConfigFile) > 0 {
 			// TODO: is this worse than using the override method with a blank cfg yaml?
 			reYamlConfig, err := yaml.Marshal(model.ConfigFile)
@@ -114,7 +116,7 @@ func FindModel(models []*GalleryModel, name string, basePath string) *GalleryMod
 // List available models
 // Models galleries are a list of yaml files that are hosted on a remote server (for example github).
 // Each yaml file contains a list of models that can be downloaded and optionally overrides to define a new model setting.
-func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*GalleryModel, error) {
+func AvailableGalleryModels(galleries []config.Gallery, basePath string) (GalleryModels, error) {
 	var models []*GalleryModel

 	// Get models from galleries
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -48,8 +48,10 @@ var _ = Describe("Model test", func() {
 			defer os.RemoveAll(tempdir)

 			gallery := []GalleryModel{{
-				Name: "bert",
-				URL:  bertEmbeddingsURL,
+				Metadata: Metadata{
+					Name: "bert",
+					URL:  bertEmbeddingsURL,
+				},
 			}}
 			out, err := yaml.Marshal(gallery)
 			Expect(err).ToNot(HaveOccurred())
--- a/core/gallery/request.go
+++ b/core/gallery/request.go
@@ -11,6 +11,14 @@ import (
 // It is used to install the model by resolving the URL and downloading the files.
 // The other fields are used to override the configuration of the model.
 type GalleryModel struct {
+	Metadata `json:",inline" yaml:",inline"`
+	// config_file is read in the situation where URL is blank - and therefore this is a base config.
+	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
+	// Overrides are used to override the configuration of the model located at URL
+	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
+}
+
+type Metadata struct {
 	URL         string   `json:"url,omitempty" yaml:"url,omitempty"`
 	Name        string   `json:"name,omitempty" yaml:"name,omitempty"`
 	Description string   `json:"description,omitempty"  yaml:"description,omitempty"`
@@ -18,10 +26,6 @@ type GalleryModel struct {
 	URLs        []string `json:"urls,omitempty" yaml:"urls,omitempty"`
 	Icon        string   `json:"icon,omitempty" yaml:"icon,omitempty"`
 	Tags        []string `json:"tags,omitempty" yaml:"tags,omitempty"`
-	// config_file is read in the situation where URL is blank - and therefore this is a base config.
-	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
-	// Overrides are used to override the configuration of the model located at URL
-	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
 	// AdditionalFiles are used to add additional files to the model
 	AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
 	// Gallery is a reference to the gallery which contains the model
@@ -58,3 +62,15 @@ func (gm GalleryModels) FindByName(name string) *GalleryModel {
 	}
 	return nil
 }
+
+func (gm GalleryModels) Paginate(pageNum int, itemsNum int) GalleryModels {
+	start := (pageNum - 1) * itemsNum
+	end := start + itemsNum
+	if start > len(gm) {
+		start = len(gm)
+	}
+	if end > len(gm) {
+		end = len(gm)
+	}
+	return gm[start:end]
+}
--- a/core/gallery/request_test.go
+++ b/core/gallery/request_test.go
@@ -9,7 +9,11 @@ import (
 var _ = Describe("Gallery API tests", func() {
 	Context("requests", func() {
 		It("parses github with a branch", func() {
-			req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
+			req := GalleryModel{
+				Metadata: Metadata{
+					URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
+				},
+			}
 			e, err := GetGalleryConfigFromURL(req.URL, "")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(e.Name).To(Equal("gpt4all-j"))
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -130,7 +130,6 @@ func API(application *application.Application) (*fiber.App, error) {
 				return metricsService.Shutdown()
 			})
 		}
-
 	}
 	// Health Checks should always be exempt from auth, so register these first
 	routes.HealthRoutes(router)
@@ -167,13 +166,15 @@ func API(application *application.Application) (*fiber.App, error) {
 	galleryService := services.NewGalleryService(application.ApplicationConfig())
 	galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())

-	routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
-	routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
-	routes.RegisterOpenAIRoutes(router, application)
+	requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
+
+	routes.RegisterElevenLabsRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
+	routes.RegisterLocalAIRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
+	routes.RegisterOpenAIRoutes(router, requestExtractor, application)
 	if !application.ApplicationConfig().DisableWebUI {
 		routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
 	}
-	routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
+	routes.RegisterJINARoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())

 	httpFS := http.FS(embedDirStatic)

--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -299,14 +299,18 @@ var _ = Describe("API test", func() {

 			g := []gallery.GalleryModel{
 				{
-					Name: "bert",
-					URL:  bertEmbeddingsURL,
+					Metadata: gallery.Metadata{
+						Name: "bert",
+						URL:  bertEmbeddingsURL,
+					},
 				},
 				{
-					Name:            "bert2",
-					URL:             bertEmbeddingsURL,
-					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					Metadata: gallery.Metadata{
+						Name:            "bert2",
+						URL:             bertEmbeddingsURL,
+						AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					},
+					Overrides: map[string]interface{}{"foo": "bar"},
 				},
 			}
 			out, err := yaml.Marshal(g)
@@ -522,77 +526,6 @@ var _ = Describe("API test", func() {
 				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 			})

-			It("runs openllama(llama-ggml backend)", Label("llama"), func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "github:go-skynet/model-gallery/openllama_3b.yaml",
-					Name:      "openllama_3b",
-					Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				By("testing completion")
-				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1))
-				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
-
-				By("testing functions")
-				resp2, err := client.CreateChatCompletion(
-					context.TODO(),
-					openai.ChatCompletionRequest{
-						Model: "openllama_3b",
-						Messages: []openai.ChatCompletionMessage{
-							{
-								Role:    "user",
-								Content: "What is the weather like in San Francisco (celsius)?",
-							},
-						},
-						Functions: []openai.FunctionDefinition{
-							openai.FunctionDefinition{
-								Name:        "get_current_weather",
-								Description: "Get the current weather",
-								Parameters: jsonschema.Definition{
-									Type: jsonschema.Object,
-									Properties: map[string]jsonschema.Definition{
-										"location": {
-											Type:        jsonschema.String,
-											Description: "The city and state, e.g. San Francisco, CA",
-										},
-										"unit": {
-											Type: jsonschema.String,
-											Enum: []string{"celcius", "fahrenheit"},
-										},
-									},
-									Required: []string{"location"},
-								},
-							},
-						},
-					})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp2.Choices)).To(Equal(1))
-				Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
-				Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
-
-				var res map[string]string
-				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
-				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
-				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
-
-			})
-
 			It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -1,47 +0,0 @@
-package fiberContext
-
-import (
-	"fmt"
-	"strings"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/rs/zerolog/log"
-)
-
-// ModelFromContext returns the model from the context
-// If no model is specified, it will take the first available
-// Takes a model string as input which should be the one received from the user request.
-// It returns the model name resolved from the context and an error if any.
-func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
-	if ctx.Params("model") != "" {
-		modelInput = ctx.Params("model")
-	}
-	if ctx.Query("model") != "" {
-		modelInput = ctx.Query("model")
-	}
-	// Set model from bearer token, if available
-	bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
-	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
-
-	// If no model was specified, take the first available
-	if modelInput == "" && !bearerExists && firstModel {
-		models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
-		if len(models) > 0 {
-			modelInput = models[0]
-			log.Debug().Msgf("No model specified, using: %s", modelInput)
-		} else {
-			log.Debug().Msgf("No model specified, returning error")
-			return "", fmt.Errorf("no model specified")
-		}
-	}
-
-	// If a model is found in bearer token takes precedence
-	if bearerExists {
-		log.Debug().Msgf("Using model from bearer token: %s", bearer)
-		modelInput = bearer
-	}
-	return modelInput, nil
-}
--- a/core/http/elements/buttons.go
+++ b/core/http/elements/buttons.go
@@ -13,7 +13,7 @@ func installButton(galleryName string) elem.Node {
 		attrs.Props{
 			"data-twe-ripple-init":  "",
 			"data-twe-ripple-color": "light",
-			"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"class":                 "float-right inline-flex items-center rounded-lg bg-blue-600 hover:bg-blue-700 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out shadow hover:shadow-lg",
 			"hx-swap":               "outerHTML",
 			// post the Model ID as param
 			"hx-post": "browse/install/model/" + galleryName,
@@ -52,7 +52,7 @@ func infoButton(m *gallery.GalleryModel) elem.Node {
 		attrs.Props{
 			"data-twe-ripple-init":  "",
 			"data-twe-ripple-color": "light",
-			"class":                 "float-left inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"class":                 "inline-flex items-center rounded-lg bg-gray-700 hover:bg-gray-600 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out",
 			"data-modal-target":     modalName(m),
 			"data-modal-toggle":     modalName(m),
 		},
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -17,7 +17,7 @@ const (
 func cardSpan(text, icon string) elem.Node {
 	return elem.Span(
 		attrs.Props{
-			"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
+			"class": "inline-flex items-center px-3 py-1 rounded-lg text-xs font-medium bg-gray-700/70 text-gray-300 border border-gray-600/50 mr-2 mb-2",
 		},
 		elem.I(attrs.Props{
 			"class": icon + " pr-2",
@@ -39,19 +39,20 @@ func searchableElement(text, icon string) elem.Node {
 		),
 		elem.Span(
 			attrs.Props{
-				"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
+				"class": "inline-flex items-center text-xs px-3 py-1 rounded-full bg-gray-700/60 text-gray-300 border border-gray-600/50 hover:bg-gray-600 hover:text-gray-100 transition duration-200 ease-in-out",
 			},
 			elem.A(
 				attrs.Props{
 					//	"name":      "search",
 					//	"value":     text,
 					//"class":     "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
-					"href":      "#!",
-					"hx-post":   "browse/search/models",
-					"hx-target": "#search-results",
+					//"href":      "#!",
+					"href": "browse?term=" + text,
+					//"hx-post":   "browse/search/models",
+					//"hx-target": "#search-results",
 					// TODO: this doesn't work
 					//	"hx-vals":      `{ \"search\": \"` + text + `\" }`,
-					"hx-indicator": ".htmx-indicator",
+					//"hx-indicator": ".htmx-indicator",
 				},
 				elem.I(attrs.Props{
 					"class": icon + " pr-2",
@@ -101,7 +102,7 @@ func modalName(m *gallery.GalleryModel) string {
 	return m.Name + "-modal"
 }

-func modelDescription(m *gallery.GalleryModel) elem.Node {
+func modelModal(m *gallery.GalleryModel) elem.Node {
 	urls := []elem.Node{}
 	for _, url := range m.URLs {
 		urls = append(urls,
@@ -116,6 +117,125 @@ func modelDescription(m *gallery.GalleryModel) elem.Node {
 		)
 	}

+	return elem.Div(
+		attrs.Props{
+			"id":          modalName(m),
+			"tabindex":    "-1",
+			"aria-hidden": "true",
+			"class":       "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
+		},
+		elem.Div(
+			attrs.Props{
+				"class": "relative p-4 w-full max-w-2xl max-h-full",
+			},
+			elem.Div(
+				attrs.Props{
+					"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
+				},
+				// header
+				elem.Div(
+					attrs.Props{
+						"class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600",
+					},
+					elem.H3(
+						attrs.Props{
+							"class": "text-xl font-semibold text-gray-900 dark:text-white",
+						},
+						elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
+					),
+					elem.Button( // close button
+						attrs.Props{
+							"class":           "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white",
+							"data-modal-hide": modalName(m),
+						},
+						elem.Raw(
+							`<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
+							<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
+						</svg>`,
+						),
+						elem.Span(
+							attrs.Props{
+								"class": "sr-only",
+							},
+							elem.Text("Close modal"),
+						),
+					),
+				),
+				// body
+				elem.Div(
+					attrs.Props{
+						"class": "p-4 md:p-5 space-y-4",
+					},
+					elem.Div(
+						attrs.Props{
+							"class": "flex justify-center items-center",
+						},
+						elem.Img(attrs.Props{
+							//	"class": "rounded-t-lg object-fit object-center h-96",
+							"class":   "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
+							"src":     m.Icon,
+							"loading": "lazy",
+						}),
+					),
+					elem.P(
+						attrs.Props{
+							"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
+						},
+						elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
+					),
+					elem.Hr(
+						attrs.Props{},
+					),
+					elem.P(
+						attrs.Props{
+							"class": "text-sm font-semibold text-gray-900 dark:text-white",
+						},
+						elem.Text("Links"),
+					),
+					elem.Ul(
+						attrs.Props{},
+						urls...,
+					),
+					elem.If(
+						len(m.Tags) > 0,
+						elem.Div(
+							attrs.Props{},
+							elem.P(
+								attrs.Props{
+									"class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white",
+								},
+								elem.Text("Tags"),
+							),
+							elem.Div(
+								attrs.Props{
+									"class": "flex flex-row flex-wrap content-center",
+								},
+								tagsNodes...,
+							),
+						),
+						elem.Div(attrs.Props{}),
+					),
+				),
+				// Footer
+				elem.Div(
+					attrs.Props{
+						"class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600",
+					},
+					elem.Button(
+						attrs.Props{
+							"data-modal-hide": modalName(m),
+							"class":           "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700",
+						},
+						elem.Text("Close"),
+					),
+				),
+			),
+		),
+	)
+
+}
+
+func modelDescription(m *gallery.GalleryModel) elem.Node {
 	return elem.Div(
 		attrs.Props{
 			"class": "p-6 text-surface dark:text-white",
@@ -132,122 +252,6 @@ func modelDescription(m *gallery.GalleryModel) elem.Node {
 			},
 			elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
 		),
-
-		elem.Div(
-			attrs.Props{
-				"id":          modalName(m),
-				"tabindex":    "-1",
-				"aria-hidden": "true",
-				"class":       "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
-			},
-			elem.Div(
-				attrs.Props{
-					"class": "relative p-4 w-full max-w-2xl max-h-full",
-				},
-				elem.Div(
-					attrs.Props{
-						"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
-					},
-					// header
-					elem.Div(
-						attrs.Props{
-							"class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600",
-						},
-						elem.H3(
-							attrs.Props{
-								"class": "text-xl font-semibold text-gray-900 dark:text-white",
-							},
-							elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
-						),
-						elem.Button( // close button
-							attrs.Props{
-								"class":           "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white",
-								"data-modal-hide": modalName(m),
-							},
-							elem.Raw(
-								`<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
-									<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
-								</svg>`,
-							),
-							elem.Span(
-								attrs.Props{
-									"class": "sr-only",
-								},
-								elem.Text("Close modal"),
-							),
-						),
-					),
-					// body
-					elem.Div(
-						attrs.Props{
-							"class": "p-4 md:p-5 space-y-4",
-						},
-						elem.Div(
-							attrs.Props{
-								"class": "flex justify-center items-center",
-							},
-							elem.Img(attrs.Props{
-								//	"class": "rounded-t-lg object-fit object-center h-96",
-								"class":   "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
-								"src":     m.Icon,
-								"loading": "lazy",
-							}),
-						),
-						elem.P(
-							attrs.Props{
-								"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
-							},
-							elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
-						),
-						elem.Hr(
-							attrs.Props{},
-						),
-						elem.P(
-							attrs.Props{
-								"class": "text-sm font-semibold text-gray-900 dark:text-white",
-							},
-							elem.Text("Links"),
-						),
-						elem.Ul(
-							attrs.Props{},
-							urls...,
-						),
-						elem.If(
-							len(m.Tags) > 0,
-							elem.Div(
-								attrs.Props{},
-								elem.P(
-									attrs.Props{
-										"class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white",
-									},
-									elem.Text("Tags"),
-								),
-								elem.Div(
-									attrs.Props{
-										"class": "flex flex-row flex-wrap content-center",
-									},
-									tagsNodes...,
-								),
-							),
-							elem.Div(attrs.Props{}),
-						),
-					),
-					// Footer
-					elem.Div(
-						attrs.Props{
-							"class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600",
-						},
-						elem.Button(
-							attrs.Props{
-								"data-modal-hide": modalName(m),
-								"class":           "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700",
-							},
-							elem.Text("Close"),
-						),
-					),
-				),
-			),
-		),
 	)
 }

@@ -397,7 +401,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 		modelsElements = append(modelsElements,
 			elem.Div(
 				attrs.Props{
-					"class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1  dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2",
+					"class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1  dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2 bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden transition-all duration-300 hover:shadow-lg hover:shadow-blue-900/20 hover:-translate-y-1 hover:border-blue-700/50",
 				},
 				elem.Div(
 					attrs.Props{
@@ -406,6 +410,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 					elems...,
 				),
 			),
+			modelModal(m),
 		)
 	}

--- a/core/http/endpoints/elevenlabs/soundgeneration.go
+++ b/core/http/endpoints/elevenlabs/soundgeneration.go
@@ -4,7 +4,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
@@ -17,45 +17,21 @@ import (
 // @Router /v1/sound-generation [post]
 func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(schema.ElevenLabsSoundGenerationRequest)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
+
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsSoundGenerationRequest)
+		if !ok || input.ModelID == "" {
+			return fiber.ErrBadRequest
 		}

-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
-		if err != nil {
-			modelFile = input.ModelID
-			log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
+		cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || cfg == nil {
+			return fiber.ErrBadRequest
 		}

-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
-		if err != nil {
-			modelFile = input.ModelID
-			log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
-		} else {
-			if input.ModelID != "" {
-				modelFile = input.ModelID
-			} else {
-				modelFile = cfg.Model
-			}
-		}
 		log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")

-		if input.Duration != nil {
-			log.Debug().Float32("duration", *input.Duration).Msg("duration set")
-		}
-		if input.Temperature != nil {
-			log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
-		}
-
 		// TODO: Support uploading files?
-		filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
+		filePath, _, err := backend.SoundGeneration(input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -3,7 +3,7 @@ package elevenlabs
 import (
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/pkg/model"

 	"github.com/gofiber/fiber/v2"
@@ -20,39 +20,21 @@ import (
 func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {

-		input := new(schema.ElevenLabsTTSRequest)
 		voiceID := c.Params("voice-id")

-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsTTSRequest)
+		if !ok || input.ModelID == "" {
+			return fiber.ErrBadRequest
 		}

-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
-		if err != nil {
-			modelFile = input.ModelID
-			log.Warn().Msgf("Model not found in context: %s", input.ModelID)
+		cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || cfg == nil {
+			return fiber.ErrBadRequest
 		}

-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
-		if err != nil {
-			modelFile = input.ModelID
-			log.Warn().Msgf("Model not found in context: %s", input.ModelID)
-		} else {
-			if input.ModelID != "" {
-				modelFile = input.ModelID
-			} else {
-				modelFile = cfg.Model
-			}
-		}
-		log.Debug().Msgf("Request for model: %s", modelFile)
+		log.Debug().Str("modelName", input.ModelID).Msg("elevenlabs TTS request recieved")

-		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
+		filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -3,9 +3,9 @@ package jina
 import (
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"

 	"github.com/gofiber/fiber/v2"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/grpc/proto"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -19,58 +19,32 @@ import (
 // @Router /v1/rerank [post]
 func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		req := new(schema.JINARerankRequest)
-		if err := c.BodyParser(req); err != nil {
-			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
-				"error": "Cannot parse JSON",
-			})
+
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.JINARerankRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}

-		input := new(schema.TTSRequest)
-
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
+		cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || cfg == nil {
+			return fiber.ErrBadRequest
 		}

-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
-		if err != nil {
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
-		}
-
-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
-		if err != nil {
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
-		} else {
-			modelFile = cfg.Model
-		}
-
-		log.Debug().Msgf("Request for model: %s", modelFile)
-
-		if input.Backend != "" {
-			cfg.Backend = input.Backend
-		}
+		log.Debug().Str("model", input.Model).Msg("JINA Rerank Request recieved")

 		request := &proto.RerankRequest{
-			Query:     req.Query,
-			TopN:      int32(req.TopN),
-			Documents: req.Documents,
+			Query:     input.Query,
+			TopN:      int32(input.TopN),
+			Documents: input.Documents,
 		}

-		results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
+		results, err := backend.Rerank(request, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}

 		response := &schema.JINARerankResponse{
-			Model: req.Model,
+			Model: input.Model,
 		}

 		for _, r := range results.Results {
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -117,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 // @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)

 		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
 		if err != nil {
 			return err
 		}
-		log.Debug().Msgf("Models found from galleries: %+v", models)
-		for _, m := range models {
-			log.Debug().Msgf("Model found from galleries: %+v", m)
+
+		log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
+
+		m := []gallery.Metadata{}
+
+		for _, mm := range models {
+			m = append(m, mm.Metadata)
 		}
-		dat, err := json.Marshal(models)
+
+		log.Debug().Msgf("Models %#v", m)
+
+		dat, err := json.Marshal(m)
 		if err != nil {
-			return err
+			return fmt.Errorf("could not marshal models: %w", err)
 		}
 		return c.Send(dat)
 	}
--- a/core/http/endpoints/localai/get_token_metrics.go
+++ b/core/http/endpoints/localai/get_token_metrics.go
@@ -4,13 +4,15 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"

 	"github.com/mudler/LocalAI/pkg/model"
 )

+// TODO: This is not yet in use. Needs middleware rework, since it is not referenced.
+
 // TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
 //
 //	@Summary	Get TokenMetrics for Active Slot.
@@ -29,18 +31,13 @@ func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader,
 			return err
 		}

-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
-		if err != nil {
+		modelFile, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
+		if !ok || modelFile != "" {
 			modelFile = input.Model
 			log.Warn().Msgf("Model not found in context: %s", input.Model)
 		}

-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
+		cfg, err := cl.LoadBackendConfigFileByNameDefaultOptions(modelFile, appConfig)

 		if err != nil {
 			log.Err(err)
--- a/core/http/endpoints/localai/tokenize.go
+++ b/core/http/endpoints/localai/tokenize.go
@@ -4,10 +4,9 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/rs/zerolog/log"
 )

 // TokenizeEndpoint exposes a REST API to tokenize the content
@@ -16,42 +15,21 @@ import (
 // @Success 200 {object} schema.TokenizeResponse "Response"
 // @Router /v1/tokenize [post]
 func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-
-		input := new(schema.TokenizeRequest)
-
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
+	return func(ctx *fiber.Ctx) error {
+		input, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TokenizeRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}

-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
-		if err != nil {
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		cfg, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || cfg == nil {
+			return fiber.ErrBadRequest
 		}

-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
-
-		if err != nil {
-			log.Err(err)
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
-		} else {
-			modelFile = cfg.Model
-		}
-		log.Debug().Msgf("Request for model: %s", modelFile)
-
 		tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
 		if err != nil {
 			return err
 		}
-
-		return c.JSON(tokenResponse)
+		return ctx.JSON(tokenResponse)
 	}
 }
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -3,7 +3,7 @@ package localai
 import (
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/pkg/model"

 	"github.com/gofiber/fiber/v2"
@@ -24,37 +24,24 @@ import (
 //		@Router		/tts [post]
 func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(schema.TTSRequest)
-
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TTSRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}

-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
-		if err != nil {
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || cfg == nil {
+			return fiber.ErrBadRequest
 		}

-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
+		log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request recieved")

-		if err != nil {
-			log.Err(err)
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
-		} else {
-			modelFile = cfg.Model
-		}
-		log.Debug().Msgf("Request for model: %s", modelFile)
-
-		if input.Backend != "" {
-			cfg.Backend = input.Backend
+		if cfg.Backend == "" {
+			if input.Backend != "" {
+				cfg.Backend = input.Backend
+			} else {
+				cfg.Backend = model.PiperBackend
+			}
 		}

 		if input.Language != "" {
@@ -65,7 +52,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 			cfg.Voice = input.Voice
 		}

-		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
+		filePath, _, err := backend.ModelTTS(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
--- a/core/http/endpoints/localai/vad.go
+++ b/core/http/endpoints/localai/vad.go
@@ -4,9 +4,8 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/pkg/grpc/proto"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
@@ -19,45 +18,20 @@ import (
 // @Router		/vad [post]
 func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(schema.VADRequest)
-
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VADRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}

-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
-		if err != nil {
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || cfg == nil {
+			return fiber.ErrBadRequest
 		}

-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
+		log.Debug().Str("model", input.Model).Msg("LocalAI VAD Request recieved")

-		if err != nil {
-			log.Err(err)
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
-		} else {
-			modelFile = cfg.Model
-		}
-		log.Debug().Msgf("Request for model: %s", modelFile)
+		resp, err := backend.VAD(input, c.Context(), ml, appConfig, *cfg)

-		opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
-
-		vadModel, err := ml.Load(opts...)
-		if err != nil {
-			return err
-		}
-		req := proto.VADRequest{
-			Audio: input.Audio,
-		}
-		resp, err := vadModel.VAD(c.Context(), &req)
 		if err != nil {
 			return err
 		}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -5,18 +5,19 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
-	"strings"
 	"time"

 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
+
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/templates"

-	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
@@ -174,26 +175,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 		textContentToReturn = ""
 		id = uuid.New().String()
 		created = int(time.Now().Unix())
-		// Set CorrelationID
-		correlationID := c.Get("X-Correlation-ID")
-		if len(strings.TrimSpace(correlationID)) == 0 {
-			correlationID = id
+		
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}
-		c.Set("X-Correlation-ID", correlationID)

-		// Opt-in extra usage flag
 		extraUsage := c.Get("Extra-Usage", "") != ""

-		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || config == nil {
+			return fiber.ErrBadRequest
 		}

-		config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-		log.Debug().Msgf("Configuration read: %+v", config)
+		log.Debug().Msgf("Chat endpoint configuration read: %+v", config)

 		funcs := input.Functions
 		shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
@@ -401,6 +396,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 				log.Debug().Msgf("Text content to return: %s", textContentToReturn)
 				noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0

+				finishReason := "stop"
+				if len(input.Tools) > 0 {
+					finishReason = "tool_calls"
+				}
+
 				switch {
 				case noActionsToRun:
 					result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
@@ -408,19 +408,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 						log.Error().Err(err).Msg("error handling question")
 						return
 					}
+
 					*c = append(*c, schema.Choice{
-						Message: &schema.Message{Role: "assistant", Content: &result}})
+						FinishReason: finishReason,
+						Message:      &schema.Message{Role: "assistant", Content: &result}})
 				default:
 					toolChoice := schema.Choice{
+						FinishReason: finishReason,
 						Message: &schema.Message{
 							Role: "assistant",
 						},
 					}

-					if len(input.Tools) > 0 {
-						toolChoice.FinishReason = "tool_calls"
-					}
-
 					for _, ss := range results {
 						name, args := ss.Name, ss.Arguments
 						if len(input.Tools) > 0 {
@@ -438,7 +437,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 								},
 							)
 						} else {
-							// otherwise we return more choices directly
+							// otherwise we return more choices directly (deprecated)
 							*c = append(*c, schema.Choice{
 								FinishReason: "function_call",
 								Message: &schema.Message{
@@ -539,7 +538,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
 		audios = append(audios, m.StringAudios...)
 	}

-	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
+	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, o, nil)
 	if err != nil {
 		log.Error().Err(err).Msg("model inference failed")
 		return "", err
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -10,12 +10,13 @@ import (

 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"

 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
-	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/templates"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
@@ -27,10 +28,9 @@ import (
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/completions [post]
 func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	id := uuid.New().String()
 	created := int(time.Now().Unix())

-	process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
+	process := func(id string, s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
 		ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
 			usage := schema.OpenAIUsage{
 				PromptTokens:     tokenUsage.Prompt,
@@ -63,22 +63,18 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
 	}

 	return func(c *fiber.Ctx) error {
-		// Add Correlation
-		c.Set("X-Correlation-ID", id)
-
-		// Opt-in extra usage flag
+		// Handle Correlation
+		id := c.Get("X-Correlation-ID", uuid.New().String())
 		extraUsage := c.Get("Extra-Usage", "") != ""

-		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}

-		log.Debug().Msgf("`input`: %+v", input)
-
-		config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || config == nil {
+			return fiber.ErrBadRequest
 		}

 		if config.ResponseFormatMap != nil {
@@ -122,7 +118,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e

 			responses := make(chan schema.OpenAIResponse)

-			go process(predInput, input, config, ml, responses, extraUsage)
+			go process(id, predInput, input, config, ml, responses, extraUsage)

 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {

--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -2,16 +2,17 @@ package openai

 import (
 	"encoding/json"
-	"fmt"
 	"time"

 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"

 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
-	model "github.com/mudler/LocalAI/pkg/model"
+
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/templates"

 	"github.com/rs/zerolog/log"
@@ -25,20 +26,21 @@ import (
 func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {

 	return func(c *fiber.Ctx) error {
+
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
+		}
 		// Opt-in extra usage flag
 		extraUsage := c.Get("Extra-Usage", "") != ""

-		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || config == nil {
+			return fiber.ErrBadRequest
 		}

-		config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
-		}
-
-		log.Debug().Msgf("Parameter Config: %+v", config)
+		log.Debug().Msgf("Edit Endpoint Input : %+v", input)
+		log.Debug().Msgf("Edit Endpoint Config: %+v", *config)

 		var result []schema.Choice
 		totalTokenUsage := backend.TokenUsage{}
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -2,11 +2,11 @@ package openai

 import (
 	"encoding/json"
-	"fmt"
 	"time"

 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/pkg/model"

 	"github.com/google/uuid"
@@ -23,14 +23,14 @@ import (
 // @Router /v1/embeddings [post]
 func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		model, input, err := readRequest(c, cl, ml, appConfig, true)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}

-		config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || config == nil {
+			return fiber.ErrBadRequest
 		}

 		log.Debug().Msgf("Parameter Config: %+v", config)
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -15,6 +15,7 @@ import (

 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"

 	"github.com/mudler/LocalAI/core/backend"
@@ -66,25 +67,23 @@ func downloadFile(url string) (string, error) {
 // @Router /v1/images/generations [post]
 func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readRequest(c, cl, ml, appConfig, false)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
+		if !ok || input.Model == "" {
+			log.Error().Msg("Image Endpoint - Invalid Input")
+			return fiber.ErrBadRequest
 		}
-
-		if m == "" {
-			m = "stablediffusion"
-		}
-		log.Debug().Msgf("Loading model: %+v", m)
-
-		config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		
+		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || config == nil {
+			log.Error().Msg("Image Endpoint - Invalid Config")
+			return fiber.ErrBadRequest
 		}

 		src := ""
 		if input.File != "" {

 			fileData := []byte{}
+			var err error
 			// check if input.File is an URL, if so download it and save it
 			// to a temporary file
 			if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -37,7 +37,7 @@ func ComputeChoices(
 	}

 	// get the model function to call for the result
-	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
+	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, o, tokenCallback)
 	if err != nil {
 		return result, backend.TokenUsage{}, err
 	}
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -1,7 +1,6 @@
 package openai

 import (
-	"fmt"
 	"io"
 	"net/http"
 	"os"
@@ -10,6 +9,8 @@ import (

 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
 	model "github.com/mudler/LocalAI/pkg/model"

 	"github.com/gofiber/fiber/v2"
@@ -25,15 +26,16 @@ import (
 // @Router /v1/audio/transcriptions [post]
 func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readRequest(c, cl, ml, appConfig, false)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request:%w", err)
+		input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
+		if !ok || input.Model == "" {
+			return fiber.ErrBadRequest
 		}

-		config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
-		if err != nil {
-			return fmt.Errorf("failed reading parameters from request: %w", err)
+		config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+		if !ok || config == nil {
+			return fiber.ErrBadRequest
 		}
+
 		// retrieve the file data from the request
 		file, err := c.FormFile("file")
 		if err != nil {
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -1,326 +1,450 @@
-package openai
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"strconv"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/google/uuid"
-	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/LocalAI/pkg/templates"
-	"github.com/mudler/LocalAI/pkg/utils"
-	"github.com/rs/zerolog/log"
-)
-
-type correlationIDKeyType string
-
-// CorrelationIDKey to track request across process boundary
-const CorrelationIDKey correlationIDKeyType = "correlationID"
-
-func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
-	input := new(schema.OpenAIRequest)
-
-	// Get input data from the request body
-	if err := c.BodyParser(input); err != nil {
-		return "", nil, fmt.Errorf("failed parsing request body: %w", err)
-	}
-
-	received, _ := json.Marshal(input)
-	// Extract or generate the correlation ID
-	correlationID := c.Get("X-Correlation-ID", uuid.New().String())
-
-	ctx, cancel := context.WithCancel(o.Context)
-	// Add the correlation ID to the new context
-	ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
-
-	input.Context = ctxWithCorrelationID
-	input.Cancel = cancel
-
-	log.Debug().Msgf("Request received: %s", string(received))
-
-	modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
-
-	return modelFile, input, err
-}
-
-func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
-	if input.Echo {
-		config.Echo = input.Echo
-	}
-	if input.TopK != nil {
-		config.TopK = input.TopK
-	}
-	if input.TopP != nil {
-		config.TopP = input.TopP
-	}
-
-	if input.Backend != "" {
-		config.Backend = input.Backend
-	}
-
-	if input.ClipSkip != 0 {
-		config.Diffusers.ClipSkip = input.ClipSkip
-	}
-
-	if input.ModelBaseName != "" {
-		config.AutoGPTQ.ModelBaseName = input.ModelBaseName
-	}
-
-	if input.NegativePromptScale != 0 {
-		config.NegativePromptScale = input.NegativePromptScale
-	}
-
-	if input.UseFastTokenizer {
-		config.UseFastTokenizer = input.UseFastTokenizer
-	}
-
-	if input.NegativePrompt != "" {
-		config.NegativePrompt = input.NegativePrompt
-	}
-
-	if input.RopeFreqBase != 0 {
-		config.RopeFreqBase = input.RopeFreqBase
-	}
-
-	if input.RopeFreqScale != 0 {
-		config.RopeFreqScale = input.RopeFreqScale
-	}
-
-	if input.Grammar != "" {
-		config.Grammar = input.Grammar
-	}
-
-	if input.Temperature != nil {
-		config.Temperature = input.Temperature
-	}
-
-	if input.Maxtokens != nil {
-		config.Maxtokens = input.Maxtokens
-	}
-
-	if input.ResponseFormat != nil {
-		switch responseFormat := input.ResponseFormat.(type) {
-		case string:
-			config.ResponseFormat = responseFormat
-		case map[string]interface{}:
-			config.ResponseFormatMap = responseFormat
-		}
-	}
-
-	switch stop := input.Stop.(type) {
-	case string:
-		if stop != "" {
-			config.StopWords = append(config.StopWords, stop)
-		}
-	case []interface{}:
-		for _, pp := range stop {
-			if s, ok := pp.(string); ok {
-				config.StopWords = append(config.StopWords, s)
-			}
-		}
-	}
-
-	if len(input.Tools) > 0 {
-		for _, tool := range input.Tools {
-			input.Functions = append(input.Functions, tool.Function)
-		}
-	}
-
-	if input.ToolsChoice != nil {
-		var toolChoice functions.Tool
-
-		switch content := input.ToolsChoice.(type) {
-		case string:
-			_ = json.Unmarshal([]byte(content), &toolChoice)
-		case map[string]interface{}:
-			dat, _ := json.Marshal(content)
-			_ = json.Unmarshal(dat, &toolChoice)
-		}
-		input.FunctionCall = map[string]interface{}{
-			"name": toolChoice.Function.Name,
-		}
-	}
-
-	// Decode each request's message content
-	imgIndex, vidIndex, audioIndex := 0, 0, 0
-	for i, m := range input.Messages {
-		nrOfImgsInMessage := 0
-		nrOfVideosInMessage := 0
-		nrOfAudiosInMessage := 0
-
-		switch content := m.Content.(type) {
-		case string:
-			input.Messages[i].StringContent = content
-		case []interface{}:
-			dat, _ := json.Marshal(content)
-			c := []schema.Content{}
-			json.Unmarshal(dat, &c)
-
-			textContent := ""
-			// we will template this at the end
-
-		CONTENT:
-			for _, pp := range c {
-				switch pp.Type {
-				case "text":
-					textContent += pp.Text
-					//input.Messages[i].StringContent = pp.Text
-				case "video", "video_url":
-					// Decode content as base64 either if it's an URL or base64 text
-					base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
-					if err != nil {
-						log.Error().Msgf("Failed encoding video: %s", err)
-						continue CONTENT
-					}
-					input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
-					vidIndex++
-					nrOfVideosInMessage++
-				case "audio_url", "audio":
-					// Decode content as base64 either if it's an URL or base64 text
-					base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
-					if err != nil {
-						log.Error().Msgf("Failed encoding image: %s", err)
-						continue CONTENT
-					}
-					input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
-					audioIndex++
-					nrOfAudiosInMessage++
-				case "image_url", "image":
-					// Decode content as base64 either if it's an URL or base64 text
-					base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
-					if err != nil {
-						log.Error().Msgf("Failed encoding image: %s", err)
-						continue CONTENT
-					}
-
-					input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
-
-					imgIndex++
-					nrOfImgsInMessage++
-				}
-			}
-
-			input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
-				TotalImages:     imgIndex,
-				TotalVideos:     vidIndex,
-				TotalAudios:     audioIndex,
-				ImagesInMessage: nrOfImgsInMessage,
-				VideosInMessage: nrOfVideosInMessage,
-				AudiosInMessage: nrOfAudiosInMessage,
-			}, textContent)
-		}
-	}
-
-	if input.RepeatPenalty != 0 {
-		config.RepeatPenalty = input.RepeatPenalty
-	}
-
-	if input.FrequencyPenalty != 0 {
-		config.FrequencyPenalty = input.FrequencyPenalty
-	}
-
-	if input.PresencePenalty != 0 {
-		config.PresencePenalty = input.PresencePenalty
-	}
-
-	if input.Keep != 0 {
-		config.Keep = input.Keep
-	}
-
-	if input.Batch != 0 {
-		config.Batch = input.Batch
-	}
-
-	if input.IgnoreEOS {
-		config.IgnoreEOS = input.IgnoreEOS
-	}
-
-	if input.Seed != nil {
-		config.Seed = input.Seed
-	}
-
-	if input.TypicalP != nil {
-		config.TypicalP = input.TypicalP
-	}
-
-	switch inputs := input.Input.(type) {
-	case string:
-		if inputs != "" {
-			config.InputStrings = append(config.InputStrings, inputs)
-		}
-	case []interface{}:
-		for _, pp := range inputs {
-			switch i := pp.(type) {
-			case string:
-				config.InputStrings = append(config.InputStrings, i)
-			case []interface{}:
-				tokens := []int{}
-				for _, ii := range i {
-					tokens = append(tokens, int(ii.(float64)))
-				}
-				config.InputToken = append(config.InputToken, tokens)
-			}
-		}
-	}
-
-	// Can be either a string or an object
-	switch fnc := input.FunctionCall.(type) {
-	case string:
-		if fnc != "" {
-			config.SetFunctionCallString(fnc)
-		}
-	case map[string]interface{}:
-		var name string
-		n, exists := fnc["name"]
-		if exists {
-			nn, e := n.(string)
-			if e {
-				name = nn
-			}
-		}
-		config.SetFunctionCallNameString(name)
-	}
-
-	switch p := input.Prompt.(type) {
-	case string:
-		config.PromptStrings = append(config.PromptStrings, p)
-	case []interface{}:
-		for _, pp := range p {
-			if s, ok := pp.(string); ok {
-				config.PromptStrings = append(config.PromptStrings, s)
-			}
-		}
-	}
-
-	// If a quality was defined as number, convert it to step
-	if input.Quality != "" {
-		q, err := strconv.Atoi(input.Quality)
-		if err == nil {
-			config.Step = q
-		}
-	}
-}
-
-func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
-	cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath,
-		config.LoadOptionDebug(debug),
-		config.LoadOptionThreads(threads),
-		config.LoadOptionContextSize(ctx),
-		config.LoadOptionF16(f16),
-	)
-
-	// Set the parameters for the language model prediction
-	updateRequestConfig(cfg, input)
-
-	if !cfg.Validate() {
-		return nil, nil, fmt.Errorf("failed to validate config")
-	}
-
-	return cfg, input, err
-}
+package middleware
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
+	"github.com/mudler/LocalAI/pkg/utils"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+type correlationIDKeyType string
+
+// CorrelationIDKey to track request across process boundary
+const CorrelationIDKey correlationIDKeyType = "correlationID"
+
+type RequestExtractor struct {
+	backendConfigLoader *config.BackendConfigLoader
+	modelLoader         *model.ModelLoader
+	applicationConfig   *config.ApplicationConfig
+}
+
+func NewRequestExtractor(backendConfigLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, applicationConfig *config.ApplicationConfig) *RequestExtractor {
+	return &RequestExtractor{
+		backendConfigLoader: backendConfigLoader,
+		modelLoader:         modelLoader,
+		applicationConfig:   applicationConfig,
+	}
+}
+
+const CONTEXT_LOCALS_KEY_MODEL_NAME = "MODEL_NAME"
+const CONTEXT_LOCALS_KEY_LOCALAI_REQUEST = "LOCALAI_REQUEST"
+const CONTEXT_LOCALS_KEY_MODEL_CONFIG = "MODEL_CONFIG"
+
+// TODO: Refactor to not return error if unchanged
+func (re *RequestExtractor) setModelNameFromRequest(ctx *fiber.Ctx) {
+	model, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
+	if ok && model != "" {
+		return
+	}
+	model = ctx.Params("model")
+
+	if (model == "") && ctx.Query("model") != "" {
+		model = ctx.Query("model")
+	}
+
+	if model == "" {
+		// Set model from bearer token, if available
+		bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // "Bearer " => "Bear" to please go-staticcheck. It looks dumb but we might as well take free performance on something called for nearly every request.
+		if bearer != "" {
+			exists, err := services.CheckIfModelExists(re.backendConfigLoader, re.modelLoader, bearer, services.ALWAYS_INCLUDE)
+			if err == nil && exists {
+				model = bearer
+			}
+		}
+	}
+
+	ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, model)
+}
+
+func (re *RequestExtractor) BuildConstantDefaultModelNameMiddleware(defaultModelName string) fiber.Handler {
+	return func(ctx *fiber.Ctx) error {
+		re.setModelNameFromRequest(ctx)
+		localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
+		if !ok || localModelName == "" {
+			ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, defaultModelName)
+			log.Debug().Str("defaultModelName", defaultModelName).Msg("context local model name not found, setting to default")
+		}
+		return ctx.Next()
+	}
+}
+
+func (re *RequestExtractor) BuildFilteredFirstAvailableDefaultModel(filterFn config.BackendConfigFilterFn) fiber.Handler {
+	return func(ctx *fiber.Ctx) error {
+		re.setModelNameFromRequest(ctx)
+		localModelName := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
+		if localModelName != "" { // Don't overwrite existing values
+			return ctx.Next()
+		}
+
+		modelNames, err := services.ListModels(re.backendConfigLoader, re.modelLoader, filterFn, services.SKIP_IF_CONFIGURED)
+		if err != nil {
+			log.Error().Err(err).Msg("non-fatal error calling ListModels during SetDefaultModelNameToFirstAvailable()")
+			return ctx.Next()
+		}
+
+		if len(modelNames) == 0 {
+			log.Warn().Msg("SetDefaultModelNameToFirstAvailable used with no matching models installed")
+			// This is non-fatal - making it so was breaking the case of direct installation of raw models
+			// return errors.New("this endpoint requires at least one model to be installed")
+			return ctx.Next()
+		}
+
+		ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, modelNames[0])
+		log.Debug().Str("first model name", modelNames[0]).Msg("context local model name not found, setting to the first model")
+		return ctx.Next()
+	}
+}
+
+// TODO: If context and cancel above belong on all methods, move that part of above into here!
+// Otherwise, it's in its own method below for now
+func (re *RequestExtractor) SetModelAndConfig(initializer func() schema.LocalAIRequest) fiber.Handler {
+	return func(ctx *fiber.Ctx) error {
+		input := initializer()
+		if input == nil {
+			return fmt.Errorf("unable to initialize body")
+		}
+		if err := ctx.BodyParser(input); err != nil {
+			return fmt.Errorf("failed parsing request body: %w", err)
+		}
+
+		// If this request doesn't have an associated model name, fetch it from earlier in the middleware chain
+		if input.ModelName(nil) == "" {
+			localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
+			if ok && localModelName != "" {
+				log.Debug().Str("context localModelName", localModelName).Msg("overriding empty model name in request body with value found earlier in middleware chain")
+				input.ModelName(&localModelName)
+			}
+		}
+
+		cfg, err := re.backendConfigLoader.LoadBackendConfigFileByNameDefaultOptions(input.ModelName(nil), re.applicationConfig)
+
+		if err != nil {
+			log.Err(err)
+			log.Warn().Msgf("Model Configuration File not found for %q", input.ModelName(nil))
+		} else if cfg.Model == "" && input.ModelName(nil) != "" {
+			log.Debug().Str("input.ModelName", input.ModelName(nil)).Msg("config does not include model, using input")
+			cfg.Model = input.ModelName(nil)
+		}
+
+		ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
+		ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
+
+		return ctx.Next()
+	}
+}
+
+func (re *RequestExtractor) SetOpenAIRequest(ctx *fiber.Ctx) error {
+	input, ok := ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
+	if !ok || input.Model == "" {
+		return fiber.ErrBadRequest
+	}
+
+	cfg, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
+	if !ok || cfg == nil {
+		return fiber.ErrBadRequest
+	}
+
+	// Extract or generate the correlation ID
+	correlationID := ctx.Get("X-Correlation-ID", uuid.New().String())
+	ctx.Set("X-Correlation-ID", correlationID)
+
+	c1, cancel := context.WithCancel(re.applicationConfig.Context)
+	// Add the correlation ID to the new context
+	ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)
+
+	input.Context = ctxWithCorrelationID
+	input.Cancel = cancel
+
+	err := mergeOpenAIRequestAndBackendConfig(cfg, input)
+	if err != nil {
+		return err
+	}
+
+	if cfg.Model == "" {
+		log.Debug().Str("input.Model", input.Model).Msg("replacing empty cfg.Model with input value")
+		cfg.Model = input.Model
+	}
+
+	ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
+	ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
+
+	return ctx.Next()
+}
+
+func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *schema.OpenAIRequest) error {
+	if input.Echo {
+		config.Echo = input.Echo
+	}
+	if input.TopK != nil {
+		config.TopK = input.TopK
+	}
+	if input.TopP != nil {
+		config.TopP = input.TopP
+	}
+
+	if input.Backend != "" {
+		config.Backend = input.Backend
+	}
+
+	if input.ClipSkip != 0 {
+		config.Diffusers.ClipSkip = input.ClipSkip
+	}
+
+	if input.ModelBaseName != "" {
+		config.AutoGPTQ.ModelBaseName = input.ModelBaseName
+	}
+
+	if input.NegativePromptScale != 0 {
+		config.NegativePromptScale = input.NegativePromptScale
+	}
+
+	if input.UseFastTokenizer {
+		config.UseFastTokenizer = input.UseFastTokenizer
+	}
+
+	if input.NegativePrompt != "" {
+		config.NegativePrompt = input.NegativePrompt
+	}
+
+	if input.RopeFreqBase != 0 {
+		config.RopeFreqBase = input.RopeFreqBase
+	}
+
+	if input.RopeFreqScale != 0 {
+		config.RopeFreqScale = input.RopeFreqScale
+	}
+
+	if input.Grammar != "" {
+		config.Grammar = input.Grammar
+	}
+
+	if input.Temperature != nil {
+		config.Temperature = input.Temperature
+	}
+
+	if input.Maxtokens != nil {
+		config.Maxtokens = input.Maxtokens
+	}
+
+	if input.ResponseFormat != nil {
+		switch responseFormat := input.ResponseFormat.(type) {
+		case string:
+			config.ResponseFormat = responseFormat
+		case map[string]interface{}:
+			config.ResponseFormatMap = responseFormat
+		}
+	}
+
+	switch stop := input.Stop.(type) {
+	case string:
+		if stop != "" {
+			config.StopWords = append(config.StopWords, stop)
+		}
+	case []interface{}:
+		for _, pp := range stop {
+			if s, ok := pp.(string); ok {
+				config.StopWords = append(config.StopWords, s)
+			}
+		}
+	}
+
+	if len(input.Tools) > 0 {
+		for _, tool := range input.Tools {
+			input.Functions = append(input.Functions, tool.Function)
+		}
+	}
+
+	if input.ToolsChoice != nil {
+		var toolChoice functions.Tool
+
+		switch content := input.ToolsChoice.(type) {
+		case string:
+			_ = json.Unmarshal([]byte(content), &toolChoice)
+		case map[string]interface{}:
+			dat, _ := json.Marshal(content)
+			_ = json.Unmarshal(dat, &toolChoice)
+		}
+		input.FunctionCall = map[string]interface{}{
+			"name": toolChoice.Function.Name,
+		}
+	}
+
+	// Decode each request's message content
+	imgIndex, vidIndex, audioIndex := 0, 0, 0
+	for i, m := range input.Messages {
+		nrOfImgsInMessage := 0
+		nrOfVideosInMessage := 0
+		nrOfAudiosInMessage := 0
+
+		switch content := m.Content.(type) {
+		case string:
+			input.Messages[i].StringContent = content
+		case []interface{}:
+			dat, _ := json.Marshal(content)
+			c := []schema.Content{}
+			json.Unmarshal(dat, &c)
+
+			textContent := ""
+			// we will template this at the end
+
+		CONTENT:
+			for _, pp := range c {
+				switch pp.Type {
+				case "text":
+					textContent += pp.Text
+					//input.Messages[i].StringContent = pp.Text
+				case "video", "video_url":
+					// Decode content as base64 either if it's an URL or base64 text
+					base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
+					if err != nil {
+						log.Error().Msgf("Failed encoding video: %s", err)
+						continue CONTENT
+					}
+					input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
+					vidIndex++
+					nrOfVideosInMessage++
+				case "audio_url", "audio":
+					// Decode content as base64 either if it's an URL or base64 text
+					base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
+					if err != nil {
+						log.Error().Msgf("Failed encoding image: %s", err)
+						continue CONTENT
+					}
+					input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
+					audioIndex++
+					nrOfAudiosInMessage++
+				case "image_url", "image":
+					// Decode content as base64 either if it's an URL or base64 text
+					base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
+					if err != nil {
+						log.Error().Msgf("Failed encoding image: %s", err)
+						continue CONTENT
+					}
+
+					input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
+
+					imgIndex++
+					nrOfImgsInMessage++
+				}
+			}
+
+			input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
+				TotalImages:     imgIndex,
+				TotalVideos:     vidIndex,
+				TotalAudios:     audioIndex,
+				ImagesInMessage: nrOfImgsInMessage,
+				VideosInMessage: nrOfVideosInMessage,
+				AudiosInMessage: nrOfAudiosInMessage,
+			}, textContent)
+		}
+	}
+
+	if input.RepeatPenalty != 0 {
+		config.RepeatPenalty = input.RepeatPenalty
+	}
+
+	if input.FrequencyPenalty != 0 {
+		config.FrequencyPenalty = input.FrequencyPenalty
+	}
+
+	if input.PresencePenalty != 0 {
+		config.PresencePenalty = input.PresencePenalty
+	}
+
+	if input.Keep != 0 {
+		config.Keep = input.Keep
+	}
+
+	if input.Batch != 0 {
+		config.Batch = input.Batch
+	}
+
+	if input.IgnoreEOS {
+		config.IgnoreEOS = input.IgnoreEOS
+	}
+
+	if input.Seed != nil {
+		config.Seed = input.Seed
+	}
+
+	if input.TypicalP != nil {
+		config.TypicalP = input.TypicalP
+	}
+
+	log.Debug().Str("input.Input", fmt.Sprintf("%+v", input.Input))
+
+	switch inputs := input.Input.(type) {
+	case string:
+		if inputs != "" {
+			config.InputStrings = append(config.InputStrings, inputs)
+		}
+	case []interface{}:
+		for _, pp := range inputs {
+			switch i := pp.(type) {
+			case string:
+				config.InputStrings = append(config.InputStrings, i)
+			case []interface{}:
+				tokens := []int{}
+				for _, ii := range i {
+					tokens = append(tokens, int(ii.(float64)))
+				}
+				config.InputToken = append(config.InputToken, tokens)
+			}
+		}
+	}
+
+	// Can be either a string or an object
+	switch fnc := input.FunctionCall.(type) {
+	case string:
+		if fnc != "" {
+			config.SetFunctionCallString(fnc)
+		}
+	case map[string]interface{}:
+		var name string
+		n, exists := fnc["name"]
+		if exists {
+			nn, e := n.(string)
+			if e {
+				name = nn
+			}
+		}
+		config.SetFunctionCallNameString(name)
+	}
+
+	switch p := input.Prompt.(type) {
+	case string:
+		config.PromptStrings = append(config.PromptStrings, p)
+	case []interface{}:
+		for _, pp := range p {
+			if s, ok := pp.(string); ok {
+				config.PromptStrings = append(config.PromptStrings, s)
+			}
+		}
+	}
+
+	// If a quality was defined as number, convert it to step
+	if input.Quality != "" {
+		q, err := strconv.Atoi(input.Quality)
+		if err == nil {
+			config.Step = q
+		}
+	}
+
+	if config.Validate() {
+		return nil
+	}
+	return fmt.Errorf("unable to validate configuration after merging")
+}
--- a/core/http/routes/elevenlabs.go
+++ b/core/http/routes/elevenlabs.go
@@ -4,17 +4,26 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/elevenlabs"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterElevenLabsRoutes(app *fiber.App,
+	re *middleware.RequestExtractor,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig) {

 	// Elevenlabs
-	app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/v1/text-to-speech/:voice-id",
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsTTSRequest) }),
+		elevenlabs.TTSEndpoint(cl, ml, appConfig))

-	app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
+	app.Post("/v1/sound-generation",
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_SOUND_GENERATION)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsSoundGenerationRequest) }),
+		elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))

 }
--- a/core/http/routes/jina.go
+++ b/core/http/routes/jina.go
@@ -3,16 +3,22 @@ package routes
 import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/jina"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"

 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterJINARoutes(app *fiber.App,
+	re *middleware.RequestExtractor,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig) {

 	// POST endpoint to mimic the reranking
-	app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
+	app.Post("/v1/rerank",
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_RERANK)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.JINARerankRequest) }),
+		jina.JINARerankEndpoint(cl, ml, appConfig))
 }
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -5,13 +5,16 @@ import (
 	"github.com/gofiber/swagger"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterLocalAIRoutes(router *fiber.App,
+	requestExtractor *middleware.RequestExtractor,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
@@ -33,8 +36,18 @@ func RegisterLocalAIRoutes(router *fiber.App,
 		router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
 	}

-	router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
-	router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
+	router.Post("/tts",
+		requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
+		requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
+		localai.TTSEndpoint(cl, ml, appConfig))
+
+	vadChain := []fiber.Handler{
+		requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VAD)),
+		requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VADRequest) }),
+		localai.VADEndpoint(cl, ml, appConfig),
+	}
+	router.Post("/vad", vadChain...)
+	router.Post("/v1/vad", vadChain...)

 	// Stores
 	sl := model.NewModelLoader("")
@@ -47,10 +60,14 @@ func RegisterLocalAIRoutes(router *fiber.App,
 		router.Get("/metrics", localai.LocalAIMetricsEndpoint())
 	}

-	// Experimental Backend Statistics Module
+	// Backend Statistics Module
+	// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
 	router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
 	router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
+	// The v1/* urls are exactly the same as above - makes local e2e testing easier if they are registered.
+	router.Get("/v1/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
+	router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))

 	// p2p
 	if p2p.IsP2PEnabled() {
@@ -67,6 +84,9 @@ func RegisterLocalAIRoutes(router *fiber.App,
 	router.Get("/system", localai.SystemInformations(ml, appConfig))

 	// misc
-	router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
+	router.Post("/v1/tokenize",
+		requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
+		requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }),
+		localai.TokenizeEndpoint(cl, ml, appConfig))

 }
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -3,51 +3,50 @@ package routes
 import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/application"
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai"
+	"github.com/mudler/LocalAI/core/http/middleware"
+	"github.com/mudler/LocalAI/core/schema"
 )

 func RegisterOpenAIRoutes(app *fiber.App,
+	re *middleware.RequestExtractor,
 	application *application.Application) {
 	// openAI compatible API endpoint

 	// chat
-	app.Post("/v1/chat/completions",
-		openai.ChatEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
-	app.Post("/chat/completions",
-		openai.ChatEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
+	chatChain := []fiber.Handler{
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
+		re.SetOpenAIRequest,
+		openai.ChatEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
+	}
+	app.Post("/v1/chat/completions", chatChain...)
+	app.Post("/chat/completions", chatChain...)

 	// edit
-	app.Post("/v1/edits",
-		openai.EditEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
+	editChain := []fiber.Handler{
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EDIT)),
+		re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
+		re.SetOpenAIRequest,
+		openai.EditEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
+	}
+	app.Post("/v1/edits", editChain...)
+	app.Post("/edits", editChain...)

-	app.Post("/edits",
-		openai.EditEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
+	// completion
+	completionChain := []fiber.Handler{
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_COMPLETION)),
+		re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
+		re.SetOpenAIRequest,
+		openai.CompletionEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
+	}
+	app.Post("/v1/completions", completionChain...)
+	app.Post("/completions", completionChain...)
+	app.Post("/v1/engines/:model/completions", completionChain...)

 	// assistant
 	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
@@ -81,45 +80,37 @@ func RegisterOpenAIRoutes(app *fiber.App,
 	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
 	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))

-	// completion
-	app.Post("/v1/completions",
-		openai.CompletionEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
-	app.Post("/completions",
-		openai.CompletionEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
-	app.Post("/v1/engines/:model/completions",
-		openai.CompletionEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
 	// embeddings
-	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	embeddingChain := []fiber.Handler{
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)),
+		re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
+		re.SetOpenAIRequest,
+		openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
+	}
+	app.Post("/v1/embeddings", embeddingChain...)
+	app.Post("/embeddings", embeddingChain...)
+	app.Post("/v1/engines/:model/embeddings", embeddingChain...)

 	// audio
-	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/audio/transcriptions",
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
+		re.SetOpenAIRequest,
+		openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
+	)
+
+	app.Post("/v1/audio/speech",
+		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
+		localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))

 	// images
-	app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/images/generations",
+		re.BuildConstantDefaultModelNameMiddleware("stablediffusion"),
+		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
+		re.SetOpenAIRequest,
+		openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))

 	if application.ApplicationConfig().ImageDir != "" {
 		app.Static("/generated-images", application.ApplicationConfig().ImageDir)
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -3,7 +3,9 @@ package routes
 import (
 	"fmt"
 	"html/template"
+	"math"
 	"sort"
+	"strconv"
 	"strings"

 	"github.com/mudler/LocalAI/core/config"
@@ -126,6 +128,8 @@ func RegisterUIRoutes(app *fiber.App,
 		// Show the Models page (all models)
 		app.Get("/browse", func(c *fiber.Ctx) error {
 			term := c.Query("term")
+			page := c.Query("page")
+			items := c.Query("items")

 			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)

@@ -164,6 +168,49 @@ func RegisterUIRoutes(app *fiber.App,
 				//	"ApplicationConfig": appConfig,
 			}

+			if page == "" {
+				page = "1"
+			}
+
+			if page != "" {
+				log.Debug().Msgf("page : %+v\n", page)
+				// return a subset of the models
+				pageNum, err := strconv.Atoi(page)
+				if err != nil {
+					return c.Status(fiber.StatusBadRequest).SendString("Invalid page number")
+				}
+
+				if pageNum == 0 {
+					return c.Render("views/models", summary)
+				}
+
+				itemsNum, err := strconv.Atoi(items)
+				if err != nil {
+					itemsNum = 21
+				}
+
+				totalPages := int(math.Ceil(float64(len(models)) / float64(itemsNum)))
+
+				models = models.Paginate(pageNum, itemsNum)
+
+				log.Debug().Msgf("number of models : %+v\n", len(models))
+				prevPage := pageNum - 1
+				nextPage := pageNum + 1
+				if prevPage < 1 {
+					prevPage = 1
+				}
+				if nextPage > totalPages {
+					nextPage = totalPages
+				}
+				if prevPage != pageNum {
+					summary["PrevPage"] = prevPage
+				}
+				summary["NextPage"] = nextPage
+				summary["TotalPages"] = totalPages
+				summary["CurrentPage"] = pageNum
+				summary["Models"] = template.HTML(elements.ListModels(models, processingModels, galleryService))
+			}
+
 			// Render index
 			return c.Render("views/models", summary)
 		})
@@ -171,6 +218,9 @@ func RegisterUIRoutes(app *fiber.App,
 		// Show the models, filtered from the user input
 		// https://htmx.org/examples/active-search/
 		app.Post("/browse/search/models", func(c *fiber.Ctx) error {
+			page := c.Query("page")
+			items := c.Query("items")
+
 			form := struct {
 				Search string `form:"search"`
 			}{}
@@ -180,7 +230,26 @@ func RegisterUIRoutes(app *fiber.App,

 			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)

-			return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService))
+			if page != "" {
+				// return a subset of the models
+				pageNum, err := strconv.Atoi(page)
+				if err != nil {
+					return c.Status(fiber.StatusBadRequest).SendString("Invalid page number")
+				}
+
+				itemsNum, err := strconv.Atoi(items)
+				if err != nil {
+					itemsNum = 21
+				}
+
+				models = models.Paginate(pageNum, itemsNum)
+			}
+
+			if form.Search != "" {
+				models = models.Search(form.Search)
+			}
+
+			return c.SendString(elements.ListModels(models, processingModels, galleryService))
 		})

 		/*
@@ -305,23 +374,6 @@ func RegisterUIRoutes(app *fiber.App,
 		})
 	}

-	// Show the Chat page
-	app.Get("/chat/:model", func(c *fiber.Ctx) error {
-		backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
-
-		summary := fiber.Map{
-			"Title":        "LocalAI - Chat with " + c.Params("model"),
-			"BaseURL":      utils.BaseURL(c),
-			"ModelsConfig": backendConfigs,
-			"Model":        c.Params("model"),
-			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
-		}
-
-		// Render index
-		return c.Render("views/chat", summary)
-	})
-
 	app.Get("/talk/", func(c *fiber.Ctx) error {
 		backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)

@@ -344,21 +396,73 @@ func RegisterUIRoutes(app *fiber.App,
 	})

 	app.Get("/chat/", func(c *fiber.Ctx) error {
+		backendConfigs := cl.GetAllBackendConfigs()
+		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)

-		backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
-
-		if len(backendConfigs) == 0 {
+		if len(backendConfigs)+len(modelsWithoutConfig) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
 			return c.Redirect(utils.BaseURL(c))
 		}
+		modelThatCanBeUsed := ""
+		galleryConfigs := map[string]*gallery.Config{}
+
+		for _, m := range backendConfigs {
+			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
+			if err != nil {
+				continue
+			}
+			galleryConfigs[m.Name] = cfg
+		}
+
+		title := "LocalAI - Chat"
+
+		for _, b := range backendConfigs {
+			if b.HasUsecases(config.FLAG_CHAT) {
+				modelThatCanBeUsed = b.Name
+				title = "LocalAI - Chat with " + modelThatCanBeUsed
+				break
+			}
+		}

 		summary := fiber.Map{
-			"Title":        "LocalAI - Chat with " + backendConfigs[0],
-			"BaseURL":      utils.BaseURL(c),
-			"ModelsConfig": backendConfigs,
-			"Model":        backendConfigs[0],
-			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
+			"Title":               title,
+			"BaseURL":             utils.BaseURL(c),
+			"ModelsWithoutConfig": modelsWithoutConfig,
+			"GalleryConfig":       galleryConfigs,
+			"ModelsConfig":        backendConfigs,
+			"Model":               modelThatCanBeUsed,
+			"Version":             internal.PrintableVersion(),
+			"IsP2PEnabled":        p2p.IsP2PEnabled(),
+		}
+
+		// Render index
+		return c.Render("views/chat", summary)
+	})
+
+	// Show the Chat page
+	app.Get("/chat/:model", func(c *fiber.Ctx) error {
+		backendConfigs := cl.GetAllBackendConfigs()
+		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
+
+		galleryConfigs := map[string]*gallery.Config{}
+
+		for _, m := range backendConfigs {
+			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
+			if err != nil {
+				continue
+			}
+			galleryConfigs[m.Name] = cfg
+		}
+
+		summary := fiber.Map{
+			"Title":               "LocalAI - Chat with " + c.Params("model"),
+			"BaseURL":             utils.BaseURL(c),
+			"ModelsConfig":        backendConfigs,
+			"GalleryConfig":       galleryConfigs,
+			"ModelsWithoutConfig": modelsWithoutConfig,
+			"Model":               c.Params("model"),
+			"Version":             internal.PrintableVersion(),
+			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -367,14 +471,16 @@ func RegisterUIRoutes(app *fiber.App,

 	app.Get("/text2image/:model", func(c *fiber.Ctx) error {
 		backendConfigs := cl.GetAllBackendConfigs()
+		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)

 		summary := fiber.Map{
-			"Title":        "LocalAI - Generate images with " + c.Params("model"),
-			"BaseURL":      utils.BaseURL(c),
-			"ModelsConfig": backendConfigs,
-			"Model":        c.Params("model"),
-			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
+			"Title":               "LocalAI - Generate images with " + c.Params("model"),
+			"BaseURL":             utils.BaseURL(c),
+			"ModelsConfig":        backendConfigs,
+			"ModelsWithoutConfig": modelsWithoutConfig,
+			"Model":               c.Params("model"),
+			"Version":             internal.PrintableVersion(),
+			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -382,21 +488,33 @@ func RegisterUIRoutes(app *fiber.App,
 	})

 	app.Get("/text2image/", func(c *fiber.Ctx) error {
-
 		backendConfigs := cl.GetAllBackendConfigs()
+		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)

-		if len(backendConfigs) == 0 {
+		if len(backendConfigs)+len(modelsWithoutConfig) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
 			return c.Redirect(utils.BaseURL(c))
 		}

+		modelThatCanBeUsed := ""
+		title := "LocalAI - Generate images"
+
+		for _, b := range backendConfigs {
+			if b.HasUsecases(config.FLAG_IMAGE) {
+				modelThatCanBeUsed = b.Name
+				title = "LocalAI - Generate images with " + modelThatCanBeUsed
+				break
+			}
+		}
+
 		summary := fiber.Map{
-			"Title":        "LocalAI - Generate images with " + backendConfigs[0].Name,
-			"BaseURL":      utils.BaseURL(c),
-			"ModelsConfig": backendConfigs,
-			"Model":        backendConfigs[0].Name,
-			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
+			"Title":               title,
+			"BaseURL":             utils.BaseURL(c),
+			"ModelsConfig":        backendConfigs,
+			"ModelsWithoutConfig": modelsWithoutConfig,
+			"Model":               modelThatCanBeUsed,
+			"Version":             internal.PrintableVersion(),
+			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -405,14 +523,16 @@ func RegisterUIRoutes(app *fiber.App,

 	app.Get("/tts/:model", func(c *fiber.Ctx) error {
 		backendConfigs := cl.GetAllBackendConfigs()
+		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)

 		summary := fiber.Map{
-			"Title":        "LocalAI - Generate images with " + c.Params("model"),
-			"BaseURL":      utils.BaseURL(c),
-			"ModelsConfig": backendConfigs,
-			"Model":        c.Params("model"),
-			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
+			"Title":               "LocalAI - Generate images with " + c.Params("model"),
+			"BaseURL":             utils.BaseURL(c),
+			"ModelsConfig":        backendConfigs,
+			"ModelsWithoutConfig": modelsWithoutConfig,
+			"Model":               c.Params("model"),
+			"Version":             internal.PrintableVersion(),
+			"IsP2PEnabled":        p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -420,21 +540,32 @@ func RegisterUIRoutes(app *fiber.App,
 	})

 	app.Get("/tts/", func(c *fiber.Ctx) error {
-
 		backendConfigs := cl.GetAllBackendConfigs()
+		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)

-		if len(backendConfigs) == 0 {
+		if len(backendConfigs)+len(modelsWithoutConfig) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
 			return c.Redirect(utils.BaseURL(c))
 		}

+		modelThatCanBeUsed := ""
+		title := "LocalAI - Generate audio"
+
+		for _, b := range backendConfigs {
+			if b.HasUsecases(config.FLAG_CHAT) {
+				modelThatCanBeUsed = b.Name
+				title = "LocalAI - Generate audio with " + modelThatCanBeUsed
+				break
+			}
+		}
 		summary := fiber.Map{
-			"Title":        "LocalAI - Generate audio with " + backendConfigs[0].Name,
-			"BaseURL":      utils.BaseURL(c),
-			"ModelsConfig": backendConfigs,
-			"Model":        backendConfigs[0].Name,
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
-			"Version":      internal.PrintableVersion(),
+			"Title":               title,
+			"BaseURL":             utils.BaseURL(c),
+			"ModelsConfig":        backendConfigs,
+			"ModelsWithoutConfig": modelsWithoutConfig,
+			"Model":               modelThatCanBeUsed,
+			"IsP2PEnabled":        p2p.IsP2PEnabled(),
+			"Version":             internal.PrintableVersion(),
 		}

 		// Render index
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -27,6 +27,21 @@ SOFTWARE.

 */

+function toggleLoader(show) {
+  const loader = document.getElementById('loader');
+  const sendButton = document.getElementById('send-button');
+  
+  if (show) {
+    loader.style.display = 'block';
+    sendButton.style.display = 'none';
+    document.getElementById("input").disabled = true;
+  } else {
+    document.getElementById("input").disabled = false;
+    loader.style.display = 'none';
+    sendButton.style.display = 'block';
+  }
+}
+
 function submitKey(event) {
    event.preventDefault();
    localStorage.setItem("key", document.getElementById("apiKey").value);
@@ -49,7 +64,7 @@ function submitPrompt(event) {
  document.getElementById("input").value = "";
  const key = localStorage.getItem("key");
  const systemPrompt = localStorage.getItem("system_prompt");
-
+  Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
  promptGPT(systemPrompt, key, input);
 }

@@ -72,9 +87,8 @@ function readInputImage() {
    // Set class "loader" to the element with "loader" id
    //document.getElementById("loader").classList.add("loader");
    // Make the "loader" visible
-    document.getElementById("loader").style.display = "block";
-    document.getElementById("input").disabled = true;
-    document.getElementById('messages').scrollIntoView(false)
+    toggleLoader(true);
+

    messages = Alpine.store("chat").messages();

@@ -181,8 +195,8 @@ function readInputImage() {
      const chatStore = Alpine.store("chat");
      chatStore.add("assistant", token);
      // Efficiently scroll into view without triggering multiple reflows
-      const messages = document.getElementById('messages');
-      messages.scrollTop = messages.scrollHeight;
+      // const messages = document.getElementById('messages');
+      // messages.scrollTop = messages.scrollHeight;
    };

    let buffer = "";
@@ -244,10 +258,8 @@ function readInputImage() {
    }

    // Remove class "loader" from the element with "loader" id
-    //document.getElementById("loader").classList.remove("loader");
-    document.getElementById("loader").style.display = "none";
-    // enable input
-    document.getElementById("input").disabled = false;
+    toggleLoader(false);
+
    // scroll to the bottom of the chat
    document.getElementById('messages').scrollIntoView(false)
    // set focus to the input
--- a/core/http/static/general.css
+++ b/core/http/static/general.css
@@ -10,18 +10,6 @@ body {
 .htmx-request .htmx-indicator{
    opacity:1
 }
-/* Loader (https://cssloaders.github.io/) */
-.loader {
-  width: 12px;
-  height: 12px;
-  border-radius: 50%;
-  display: block;
-  margin:15px auto;
-  position: relative;
-  color: #FFF;
-  box-sizing: border-box;
-  animation: animloader 2s linear infinite;
-}

@keyframes animloader {
  0% { box-shadow: 14px 0 0 -2px,  38px 0 0 -2px,  -14px 0 0 -2px,  -38px 0 0 -2px; }
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -4,7 +4,7 @@ Part of this page is based on the OpenAI Chatbot example by David Härer:
 https://github.com/david-haerer/chatapi

 MIT License Copyright (c) 2023 David Härer
-            Copyright (c) 2024 Ettore Di Giacinto
+            Copyright (c) 2024-2025 Ettore Di Giacinto

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -29,145 +29,382 @@ SOFTWARE.
 <html lang="en">
  {{template "views/partials/head" .}}
  <script defer src="static/chat.js"></script>
-  <style>
-    body {
-        overflow: hidden; 
-    }
-  </style>
-  <body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
-    <div class="flex flex-col min-h-screen">
-
+  {{ $allGalleryConfigs:=.GalleryConfig }}
+  {{ $model:=.Model}}
+  <body class="bg-slate-900 text-gray-100 flex flex-col h-screen" x-data="{ key: $store.chat.key, sidebarOpen: true }">
    {{template "views/partials/navbar" .}}
-    <div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg" >
-     <!-- Chat Header -->
-    <div class="border-b border-gray-700 p-4"  x-data="{ component: 'menu' }">

-      <div class="flex items-center justify-between">
+    <!-- Main container with sidebar toggle -->
+    <div class="flex flex-1 overflow-hidden relative">
+      <!-- Sidebar -->
+      <div 
+        class="sidebar bg-gray-800 fixed top-16 bottom-0 left-0 w-64 transform transition-transform duration-300 ease-in-out z-30 border-r border-gray-700 overflow-y-auto"
+        :class="sidebarOpen ? 'translate-x-0' : '-translate-x-full'">
+        
+        <div class="p-4 flex justify-between items-center border-b border-gray-700">
+          <h2 class="text-lg font-semibold">Chat Settings</h2>
+          <button 
+            @click="sidebarOpen = false"
+            class="text-gray-400 hover:text-white focus:outline-none">
+            <i class="fa-solid fa-times"></i>
+          </button>
+        </div>

-      <h1 class="text-lg font-semibold"> <i class="fa-solid fa-comments"></i> Chat with {{.Model}} <a href="https://localai.io/features/text-generation/" target="_blank" >
-        <i class="fas fa-circle-info pr-2"></i>
-      </a></h1>
-      <div x-show="component === 'menu'" id="menu">
-        <button
-          @click="$store.chat.clear()"
-          id="clear"
-          title="Clear chat history"
+        <!-- Sidebar content -->
+        <div class="p-4 space-y-6">
+          <!-- Model selection - Fixed to properly select current model -->
+          <div class="space-y-2">
+            <label class="text-sm font-medium text-gray-300">Select Model</label>
+            <select
+              id="modelSelector"
+              class="w-full bg-gray-700 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
+              onchange="window.location = this.value"
+            >
+              <option value="" disabled class="text-gray-400">Select a model</option>
+             
+              {{ range .ModelsConfig }}
+                {{ $cfg := . }}
+                {{ range .KnownUsecaseStrings }}
+                  {{ if eq . "FLAG_CHAT" }}
+                    <option
+                      value="chat/{{$cfg.Name}}"
+                      {{ if eq $cfg.Name $model }} selected {{end}}
+                      class="bg-gray-700 text-white"
+                    >
+                      {{$cfg.Name}}
+                    </option>
+                  {{ end }}
+                {{ end }}
+              {{ end }}
+              {{ range .ModelsWithoutConfig }}
+                <option
+                  value="chat/{{.}}"
+                  {{ if eq . $model }} selected {{ end }}
+                  class="bg-gray-700 text-white"
+                >
+                  {{.}}
+                </option>
+              {{end}}
+            </select>
+          </div>

-          data-twe-ripple-init
-          data-twe-ripple-color="light"
-          class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
-          >
-          Clear chat 🔥
-        </button>
-        <button @click="component = 'key'" title="Update API key"
-        class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
-        >Set API Key🔑</button>
-        <button @click="component = 'system_prompt'" title="System Prompt"
-        class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
-        >Set system prompt</button>
+          {{ if $model }}
+          {{ $galleryConfig:= index $allGalleryConfigs $model}}
+          {{ if $galleryConfig }}
+          <!-- Model info -->
+          <div class="space-y-2">
+            <div class="flex items-center">
+              {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
+              <h3 class="text-md font-medium">{{ $model }}</h3>
+            </div>
+            <button data-twe-ripple-init data-twe-ripple-color="light" class="w-full text-left flex items-center px-3 py-2 text-xs rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors" data-modal-target="model-info-modal" data-modal-toggle="model-info-modal">
+              <i class="fas fa-info-circle mr-2"></i>
+              Model Information
+            </button>
+          </div>
+          {{ end }}
+          {{ end }}
+
+          <div x-data="{ activeTab: 'actions' }" class="space-y-4">
+            <!-- Tab navigation -->
+            <div class="flex border-b border-gray-700">
+              <button 
+                @click="activeTab = 'actions'" 
+                :class="activeTab === 'actions' ? 'border-b-2 border-blue-500 text-white' : 'text-gray-400 hover:text-white'"
+                class="py-2 px-4 text-sm font-medium">
+                Actions
+              </button>
+              <button 
+                @click="activeTab = 'settings'" 
+                :class="activeTab === 'settings' ? 'border-b-2 border-blue-500 text-white' : 'text-gray-400 hover:text-white'"
+                class="py-2 px-4 text-sm font-medium">
+                Settings
+              </button>
+            </div>
+
+            <!-- Actions tab -->
+            <div x-show="activeTab === 'actions'" class="space-y-3">
+              <button
+                @click="$store.chat.clear()"
+                id="clear"
+                title="Clear chat history"
+                class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+              >
+                <i class="fa-solid fa-trash-can mr-2"></i> Clear chat
+              </button>
+              
+              <a 
+                href="https://localai.io/features/text-generation/" 
+                target="_blank"
+                class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+              >
+                <i class="fas fa-book mr-2"></i> Documentation
+              </a>
+              
+              <a 
+                href="browse?term={{.Model}}" 
+                class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+              >
+                <i class="fas fa-brain mr-2"></i> Browse Model
+              </a>
+            </div>
+
+            <!-- Settings tab -->
+            <div x-show="activeTab === 'settings'" x-data="{ showKeyForm: false, showPromptForm: false }" class="space-y-3">
+              <button 
+                @click="showKeyForm = !showKeyForm; showPromptForm = false" 
+                class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+              >
+                <span><i class="fa-solid fa-key mr-2"></i> API Key</span>
+                <i :class="showKeyForm ? 'fa-chevron-up' : 'fa-chevron-down'" class="fa-solid"></i>
+              </button>
+              
+              <div x-show="showKeyForm" class="p-3 bg-gray-700 rounded">
+                <form id="key" class="flex flex-col space-y-2">
+                  <input
+                    type="password"
+                    id="apiKey"
+                    name="apiKey"
+                    class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
+                    placeholder="OpenAI API Key"
+                    x-model.lazy="key"
+                  />
+                  <button
+                    type="submit"
+                    class="px-3 py-2 text-sm rounded text-white bg-blue-600 hover:bg-blue-700 transition-colors"
+                  >
+                    Save API Key
+                  </button>
+                </form>
+              </div>
+
+              <button 
+                @click="showPromptForm = !showPromptForm; showKeyForm = false" 
+                class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+              >
+                <span><i class="fa-solid fa-message mr-2"></i> System Prompt</span>
+                <i :class="showPromptForm ? 'fa-chevron-up' : 'fa-chevron-down'" class="fa-solid"></i>
+              </button>
+              
+              <div x-show="showPromptForm" class="p-3 bg-gray-700 rounded">
+                <form id="system_prompt" class="flex flex-col space-y-2">
+                  <textarea
+                    type="text"
+                    id="systemPrompt"
+                    name="systemPrompt"
+                    class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none min-h-24"
+                    placeholder="System prompt"
+                    x-model.lazy="$store.chat.systemPrompt"
+                  ></textarea>
+                  <button
+                    type="submit"
+                    class="px-3 py-2 text-sm rounded text-white bg-blue-600 hover:bg-blue-700 transition-colors"
+                  >
+                    Save System Prompt
+                  </button>
+                </form>
+              </div>
+            </div>
+          </div>
+        </div>
      </div>
-      <form x-show="component === 'key'" id="key">
-        <input
-          type="password"
-          id="apiKey"
-          name="apiKey"
-          class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
-          placeholder="OpenAI API Key"
-          x-model.lazy="key"
-        />
-        <button @click="component = 'menu'" type="submit" title="Save API key">
-          <i class="fa-solid fa-arrow-right"></i>
-        </button>
-      </form>
-      <form x-show="component === 'system_prompt'" id="system_prompt">
-        <textarea
-          type="text"
-          id="systemPrompt"
-          name="systemPrompt"
-          class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
-          placeholder="System prompt"
-          x-model.lazy="system_prompt"
-        ></textarea>
-        <button @click="component = 'menu'" type="submit" title="Save Prompt">
-          <i class="fa-solid fa-arrow-right"></i>
-        </button>
-      </form>

-      <select x-data="{ link : '' }" x-model="link" x-init="$watch('link', value => window.location = link)" 
-        class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
-        >	
-        <!-- Options -->
-        <option value="" disabled class="text-gray-400" >Select a model</option>
-        {{ $model:=.Model}}
-        {{ range .ModelsConfig }}
-        {{ if eq . $model }}
-        <option value="chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
-        {{ else }}
-        <option value="chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
-        {{ end }}
-        {{ end }}
-      </select>
+      <!-- Main chat container (shifts with sidebar) -->
+      <div 
+        class="flex-1 flex flex-col transition-all duration-300 ease-in-out"
+        :class="sidebarOpen ? 'ml-64' : 'ml-0'">
+        
+        <!-- Chat header with toggle button -->
+        <div class="border-b border-gray-700 p-4 flex items-center">
+          <!-- Sidebar toggle button moved to be the first element in the header and with clear styling -->
+          <button 
+            @click="sidebarOpen = !sidebarOpen" 
+            class="mr-4 text-gray-300 hover:text-white focus:outline-none bg-gray-800 hover:bg-gray-700 p-2 rounded"
+            style="min-width: 36px;"
+            title="Toggle settings">
+            <i class="fa-solid" :class="sidebarOpen ? 'fa-times' : 'fa-bars'"></i>
+          </button>
+          
+          <div class="flex items-center">
+            <i class="fa-solid fa-comments mr-2"></i>
+            {{ if $model }}
+            {{ $galleryConfig:= index $allGalleryConfigs $model}}
+            {{ if $galleryConfig }}
+            {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
+            {{ end }}
+            {{ end }}
+            <h1 class="text-lg font-semibold">
+              Chat {{ if .Model }} with {{.Model}} {{ end }}
+            </h1>
+          </div>
+        </div>

-      </div>
-    </div>
-
-    <div class="chat-messages p-4" id="chat" x-data="{history: $store.chat.history}">
-      <p id="usage" x-show="history.length === 0">
-        Start chatting with the AI by typing a prompt in the input field below and pressing Enter.
-        For models that support images, you can upload an image by clicking the paperclip <i class="fa-solid fa-paperclip"></i> icon.
-      </p>
-      <div id="messages">
-      <template x-for="message in history">
-        <div class="message flex items-start space-x-2 my-2" >
-          <!--<img :src="message.role === 'user' ? '/path/to/user-icon.png' : '/path/to/bot-icon.png'" alt="" class="h-6 w-6">-->
-          <i class="fa-solid h-8 w-8" :class="message.role === 'user' ? 'fa-user' : 'fa-robot'"  ></i>
-          <div class="flex flex-col flex-1">
-            <span class="text-xs font-semibold text-gray-600" x-text="message.role === 'user' ? 'User' : 'Assistant ({{.Model}})'"></span>
-            <template x-if="message.role === 'user'">
-              <div class="p-2 flex-1 rounded" :class="message.role" x-html="message.html"></div>
-            </template>
-            <template x-if="message.role === 'assistant'">
-              <div class="p-2 flex-1 rounded" :class="message.role" x-html="message.html"></div>
-            </template>
-            <template x-if="message.image">
-              <img :src="message.image" alt="Image" class="rounded-lg mt-2 h-36 w-36">
+        <!-- Chat messages area -->
+        <div class="flex-1 p-4 overflow-auto" id="chat" x-data="{history: $store.chat.history}">
+          <p id="usage" x-show="history.length === 0" class="text-gray-300">
+            Start chatting with the AI by typing a prompt in the input field below and pressing Enter.
+            For models that support images, you can upload an image by clicking the paperclip
+            <i class="fa-solid fa-paperclip"></i> icon.
+          </p>
+          <div id="messages" class="max-w-3xl mx-auto">
+            <template x-for="message in history">
+              <div :class="message.role === 'user' ? 'flex items-start space-x-2 my-2 justify-end' : 'flex items-start space-x-2 my-2'">
+                {{ if .Model }}
+                {{ $galleryConfig:= index $allGalleryConfigs .Model}}
+                <template x-if="message.role === 'user'">
+                  <div class="flex items-center space-x-2">
+                    <div class="flex flex-col flex-1 items-end">
+                      <span class="text-xs font-semibold text-gray-400">You</span>
+                      <div class="p-2 flex-1 rounded bg-gray-700 text-white" x-html="message.html"></div>
+                      <template x-if="message.image">
+                        <img :src="message.image" alt="Image" class="rounded-lg mt-2 max-w-xs">
+                      </template>
+                    </div>
+                  </div>
+                </template>
+                <template x-if="message.role != 'user'">
+                  <div class="flex items-center space-x-2">
+                    {{ if $galleryConfig }}
+                    {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8">{{end}}
+                    {{ end }}
+                    <div class="flex flex-col flex-1">
+                      <span class="text-xs font-semibold text-gray-400">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
+                      <div class="flex-1 text-white flex items-center space-x-2">
+                        <div x-html="message.html"></div>
+                        <button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-gray-400 hover:text-gray-100">
+                          <i class="fa-solid fa-copy"></i>
+                        </button>
+                      </div>
+                      <template x-if="message.image">
+                        <img :src="message.image" alt="Image" class="rounded-lg mt-2 max-w-xs">
+                      </template>
+                    </div>
+                  </div>
+                </template>
+                {{ else }}
+                <i
+                  class="fa-solid h-8 w-8"
+                  :class="message.role === 'user' ? 'fa-user' : 'fa-robot'"
+                ></i>
+                {{ end }}
+              </div>
            </template>
          </div>
        </div>
-      </template>
-      </div>
-    </div>

-    <div class="p-4 border-t border-gray-700" x-data="{ inputValue: '', shiftPressed: false, fileName: ''  }">
-      <div id="loader" class="my-2 loader" style="display: none;"></div>
-      <input id="chat-model" type="hidden" value="{{.Model}}">
-      <input id="input_image" type="file" style="display: none;" @change="fileName = $event.target.files[0].name">
-      <form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
-          <div class="relative w-full">
-              <textarea
+
+          <!-- Chat Input -->
+          <div class="p-4 border-t border-gray-700" x-data="{ inputValue: '', shiftPressed: false, fileName: '', isLoading: false }">
+            <form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt" class="max-w-3xl mx-auto">
+              <div class="relative w-full bg-gray-800 rounded-xl shadow-md">
+                <textarea
                  id="input"
                  name="input"
                  x-model="inputValue"
                  placeholder="Send a message..."
-                  class="p-2 pl-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
+                  class="p-4 pr-16 w-full bg-gray-800 text-gray-100 placeholder-gray-400 focus:outline-none resize-none border-0 rounded-xl transition-colors duration-200"
                  required
                  @keydown.shift="shiftPressed = true"
                  @keyup.shift="shiftPressed = false"
                  @keydown.enter="if (!shiftPressed) { submitPrompt($event); }"
-                  style="padding-right: 4rem;"
-              ></textarea>
-              <span x-text="fileName" id="fileName" class="absolute right-16 top-5 text-gray-300 text-sm mr-2"></span>
-              <button type="button" onclick="document.getElementById('input_image').click()" class="fa-solid fa-paperclip text-gray-300 ml-2 absolute right-10 top-3 text-lg p-2">
-              </button>
-              <button type=submit><i class="fa-solid fa-circle-up text-gray-300 absolute right-2 top-3 text-lg p-2"></i></button>
+                  rows="3"
+                  style="box-shadow: 0 0 0 1px rgba(75, 85, 99, 0.4) inset;"
+                ></textarea>
+                <span x-text="fileName" id="fileName" class="absolute right-16 top-4 text-gray-400 text-sm mr-2"></span>
+                <button
+                  type="button"
+                  onclick="document.getElementById('input_image').click()"
+                  class="fa-solid fa-paperclip text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  title="Attach an image"
+                ></button>
+                
+                <!-- Send button and loader in the same position -->
+                <div class="absolute right-3 top-4">
+                  <!-- Loader (hidden by default) -->
+                  <div id="loader" class="text-lg p-2" style="display: none;">
+                    <svg class="animate-spin h-5 w-5 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
+                      <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                      <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+                    </svg>
+                  </div>
+                  
+                  <!-- Send button -->
+                  <button
+                    id="send-button"
+                    type="submit"
+                    class="text-lg p-2 text-gray-400 hover:text-blue-400 transition-colors duration-200"
+                    title="Send message"
+                  >
+                    <i class="fa-solid fa-paper-plane"></i>
+                  </button>
+                </div>
+              </div>
+            </form>
+            <input id="chat-model" type="hidden" value="{{.Model}}">
+            <input
+              id="input_image"
+              type="file"
+              style="display: none;"
+              @change="fileName = $event.target.files[0].name"
+            />
          </div>
-      </form>
-  </div>
+          </form>
+        </div>
+      </div>
+    </div>
+    
+    <!-- Modal moved outside of sidebar to appear in center of page -->
+    {{ if $model }}
+    {{ $galleryConfig:= index $allGalleryConfigs $model}}
+    {{ if $galleryConfig }}
+    <div id="model-info-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 flex justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
+      <div class="relative p-4 w-full max-w-2xl max-h-full">
+        <div class="relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700">
+          <!-- Header -->
+          <div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
+            <h3 class="text-xl font-semibold text-gray-900 dark:text-white">{{ $model }}</h3>
+            <button class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="model-info-modal">
+              <svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
+                <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
+              </svg>
+              <span class="sr-only">Close modal</span>
+            </button>
+          </div>
+                  
+          <!-- Body -->
+          <div class="p-4 md:p-5 space-y-4">
+            <div class="flex justify-center items-center">
+              {{ if $galleryConfig.Icon }}<img class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded" src="{{$galleryConfig.Icon}}" loading="lazy"/>{{end}}
+            </div>
+            <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">{{ $galleryConfig.Description }}</p>
+            <hr>
+            <p class="text-sm font-semibold text-gray-900 dark:text-white">Links</p>
+            <ul>
+              {{range $galleryConfig.URLs}}
+              <li><a href="{{ . }}" target="_blank">{{ . }}</a></li>
+              {{end}}
+            </ul>                   
+          </div>
+                  
+          <!-- Footer -->
+          <div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
+            <button data-modal-hide="model-info-modal" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">
+              Close
+            </button>
+          </div>
+        </div>
+      </div>
+    </div>
+    {{ end }}
+    {{ end }}
+
+    <!-- Alpine store initialization -->
    <script>
      document.addEventListener("alpine:init", () => {
        Alpine.store("chat", {
          history: [],
          languages: [undefined],
+          systemPrompt: "",
          clear() {
            this.history.length = 0;
          },
@@ -175,31 +412,22 @@ SOFTWARE.
            const N = this.history.length - 1;
            if (this.history.length && this.history[N].role === role) {
              this.history[N].content += content;
-              str = this.history[N].content;
              this.history[N].html = DOMPurify.sanitize(
-                marked.parse(this.history[N].content),
+                marked.parse(this.history[N].content)
              );
            } else {
-              c =  ""
-              // split content newlines in content
+              let c = "";
              const lines = content.split("\n");
-              // for each line, do DOMPurify.sanitize(marked.parse(line)) and add it to c
              lines.forEach((line) => {
                c += DOMPurify.sanitize(marked.parse(line));
              });
-
-              this.history.push({
-                role: role,
-                content: content,
-                html: c,
-                image: image,
-              });
+              this.history.push({ role, content, html: c, image });
            }
-
+            document.getElementById('messages').scrollIntoView(false);
            const parser = new DOMParser();
            const html = parser.parseFromString(
              this.history[this.history.length - 1].html,
-              "text/html",
+              "text/html"
            );
            const code = html.querySelectorAll("pre code");
            if (!code.length) return;
@@ -213,17 +441,26 @@ SOFTWARE.
            });
          },
          messages() {
-            return this.history.map((message) => {
-              return {
-                role: message.role,
-                content: message.content,
-                image: message.image,
-              };
-            });
+            return this.history.map((message) => ({
+              role: message.role,
+              content: message.content,
+              image: message.image,
+            }));
          },
        });
+
+        window.copyToClipboard = (content) => {
+          const tempElement = document.createElement('div');
+          tempElement.innerHTML = content;
+          const text = tempElement.textContent || tempElement.innerText;
+
+          navigator.clipboard.writeText(text).then(() => {
+            alert('Copied to clipboard!');
+          }).catch(err => {
+            console.error('Failed to copy: ', err);
+          });
+        };
      });
    </script>
-    </div>
  </body>
-</html>
+</html>
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -2,113 +2,185 @@
 <html lang="en">
 {{template "views/partials/head" .}}

-<body class="bg-gray-900 text-gray-200">
+<body class="bg-gradient-to-br from-gray-900 to-gray-950 text-gray-200">
 <div class="flex flex-col min-h-screen">

    {{template "views/partials/navbar" .}}

-    <div class="container mx-auto px-4 flex-grow">
-        <div class="header text-center py-12">
-            <h1 class="text-5xl font-bold text-gray-100">Welcome to <i>your</i> LocalAI instance!</h1>
-            <p class="mt-4 text-lg">The FOSS alternative to OpenAI, Claude, ...</p>
-            <a href="https://localai.io" target="_blank" class="mt-4 inline-block bg-blue-500 text-white py-2 px-4 rounded-lg shadow transition duration-300 ease-in-out hover:bg-blue-700 hover:shadow-lg">
-                <i class="fas fa-book-reader pr-2"></i>Documentation
-            </a>
+    <div class="container mx-auto px-4 py-8 flex-grow">
+        <!-- Hero Section -->
+        <div class="bg-gradient-to-r from-blue-900/30 to-indigo-900/30 rounded-2xl shadow-xl p-8 mb-10">
+            <div class="max-w-4xl mx-auto text-center">
+                <h1 class="text-4xl md:text-5xl font-bold text-white mb-4">
+                    <span class="bg-clip-text text-transparent bg-gradient-to-r from-blue-400 to-indigo-400">
+                        Welcome to <i>your</i> LocalAI instance!
+                    </span>
+                </h1>
+                <p class="text-xl text-gray-300 mb-6">The FOSS alternative to OpenAI, Claude, and more</p>
+                <div class="flex flex-wrap justify-center gap-4">
+                    <a href="https://localai.io" target="_blank" 
+                       class="group flex items-center bg-blue-600 hover:bg-blue-700 text-white py-2 px-6 rounded-lg transition duration-300 ease-in-out transform hover:scale-105 hover:shadow-lg">
+                        <i class="fas fa-book-reader mr-2"></i>
+                        <span>Documentation</span>
+                        <i class="fas fa-arrow-right opacity-0 group-hover:opacity-100 group-hover:translate-x-2 ml-2 transition-all duration-300"></i>
+                    </a>
+                    <a href="browse" 
+                       class="group flex items-center bg-indigo-600 hover:bg-indigo-700 text-white py-2 px-6 rounded-lg transition duration-300 ease-in-out transform hover:scale-105 hover:shadow-lg">
+                        <i class="fas fa-images mr-2"></i>
+                        <span>Gallery</span>
+                        <i class="fas fa-arrow-right opacity-0 group-hover:opacity-100 group-hover:translate-x-2 ml-2 transition-all duration-300"></i>
+                    </a>
+                </div>
+            </div>
        </div>

-        <div class="models mt-4">
+        <!-- Models Section -->
+        <div class="models mt-8">
            {{template "views/partials/inprogress" .}}
+            
            {{ if eq (len .ModelsConfig) 0 }}
-            <h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2>
-            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
+            <div class="bg-gray-800/50 border border-gray-700/50 rounded-xl p-8 shadow-md backdrop-blur-sm">
+                <div class="text-center max-w-3xl mx-auto">
+                    <div class="inline-flex items-center justify-center w-16 h-16 rounded-full bg-yellow-500/20 mb-4">
+                        <i class="text-yellow-400 text-2xl fa-solid fa-triangle-exclamation"></i>
+                    </div>
+                    <h2 class="text-2xl md:text-3xl font-semibold text-gray-100 mb-4">No models installed from the LocalAI gallery</h2>
+                    <p class="text-lg text-gray-300 mb-6">Install models from the <a class="text-blue-400 hover:text-blue-300 underline underline-offset-2" href="browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-blue-400 hover:text-blue-300 underline underline-offset-2"> <i class="fa-solid fa-book"></i> Getting started documentation</a></p>

-            {{ if ne (len .Models) 0 }}
-            <hr class="my-4">
-            <h3 class="text-center text-xl font-semibold text-gray-100"> 
-                However, It seems you have installed some models installed without a configuration file:
-            </h3>
-            {{ range .Models }}
-            <div class="bg-gray-800 border-b border-gray-700 p-4 mt-4">
-                <h4 class="text-md font-bold text-gray-200">{{.}}</h4>
-            </div>              
-            {{end}}
-            {{end}}
+                    {{ if ne (len .Models) 0 }}
+                    <div class="mt-8 pt-8 border-t border-gray-700/50">
+                        <h3 class="text-xl font-semibold text-gray-100 mb-4">Models installed without a configuration file:</h3>
+                        <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
+                            {{ range .Models }}
+                            <div class="bg-gray-800/80 border border-gray-700 rounded-lg p-4 flex items-center">
+                                <i class="fas fa-brain text-lg text-gray-400 mr-3"></i>
+                                <p class="font-medium text-gray-200">{{.Name}}</p>
+                            </div>              
+                            {{end}}
+                        </div>
+                    </div>
+                    {{end}}
+                </div>
+            </div>
            {{ else }}
            {{ $modelsN := len .ModelsConfig}}
            {{ $modelsN = add $modelsN (len .Models)}}
-            <h2 class="text-center text-3xl font-semibold text-gray-100">{{$modelsN}} Installed model(s)</h2>
-            <table class="table-auto mt-4 w-full text-left text-gray-200">
-                <thead class="text-xs text-gray-400 uppercase bg-gray-700">
-                    <tr>
-                        <th class="px-4 py-2"></th>
-                        <th class="px-4 py-2">Model Name</th>
-                        <th class="px-4 py-2">Backend</th>
-                        <th class="px-4 py-2 float-right">Actions</th>
-                    </tr>
-                </thead>
-                <tbody>
+            <div class="mb-6 flex flex-col md:flex-row md:items-center md:justify-between">
+                <h2 class="text-2xl md:text-3xl font-bold text-white mb-4 md:mb-0">
+                    <span class="text-blue-400">{{$modelsN}}</span> Installed Model<span class="{{if gt $modelsN 1}}s{{end}}">
+                </h2>
+                <!--
+                <div class="flex gap-4">
+                    <button class="text-sm bg-gray-800 hover:bg-gray-700 text-gray-300 py-2 px-4 rounded-lg transition flex items-center gap-2">
+                        <i class="fas fa-filter"></i> Filter
+                    </button>
+                    <button class="text-sm bg-gray-800 hover:bg-gray-700 text-gray-300 py-2 px-4 rounded-lg transition flex items-center gap-2">
+                        <i class="fas fa-sort"></i> Sort
+                    </button>
+                </div>
+                -->
+            </div>
+            
+            <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
                {{$galleryConfig:=.GalleryConfig}}
                {{$noicon:="https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"}}
+                
                {{ range .ModelsConfig }}
+                {{ $backendCfg := . }}
                {{ $cfg:= index $galleryConfig .Name}}
-                <tr class="bg-gray-800 border-b border-gray-700">
-                    <td class="px-4 py-3">
-                        {{ with $cfg }}
-                        <img {{ if $cfg.Icon }}
-                            src="{{$cfg.Icon}}"
-                            {{ else }}
-                            src="{{$noicon}}"
-                            {{ end }}
-                            class="rounded-t-lg max-h-24 max-w-24 object-cover mt-3"
+                <div class="bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden transition-all duration-300 hover:shadow-lg hover:shadow-blue-900/20 hover:-translate-y-1 hover:border-blue-700/50">
+                    <div class="flex p-5">
+                        <div class="w-20 h-20 rounded-lg overflow-hidden flex-shrink-0 bg-gray-700/50 flex items-center justify-center">
+                            <img {{ if and $cfg $cfg.Icon }}
+                                src="{{$cfg.Icon}}"
+                                {{ else }}
+                                src="{{$noicon}}"
+                                {{ end }}
+                                class="w-full h-full object-contain"
+                                alt="{{.Name}} icon"
                            >
-                        {{ else}}
-                            <img src="{{$noicon}}" class="rounded-t-lg max-h-24 max-w-24 object-cover mt-3">
-                        {{ end }}
-                    </td>
-                    <td class="px-4 py-3 font-bold">
-                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="browse?term={{.Name}}">{{.Name}}</a></p>
-                    </td>
-                    <td class="px-4 py-3 font-bold">
-                        {{ if .Backend }}
-                        <!-- Badge for Backend -->
-                        <span class="inline-block bg-blue-500 text-white py-1 px-3 rounded-full text-xs">
-                            {{.Backend}}
-                        </span>
-                        {{ else }}
-                        <span class="inline-block bg-yellow-500 text-white py-1 px-3 rounded-full text-xs">
-                            auto
-                        </span>
-                        {{ end }}
-                    </td>
-
-                    <td class="px-4 py-3">
-                        <button
-                            class="float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
-                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
-                    </td>
+                        </div>
+                        <div class="ml-4 flex-1 min-w-0">
+                            <div class="flex items-center">
+                                <h3 class="font-bold text-lg text-white truncate">{{.Name}}</h3>
+                                <a href="browse?term={{.Name}}" class="ml-2 text-gray-400 hover:text-blue-400 transition" title="Search for similar models">
+                                    <i class="fas fa-search text-xs"></i>
+                                </a>
+                            </div>
+                            
+                            <div class="mt-2 flex flex-wrap gap-2">
+                                {{ if .Backend }}
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-md text-xs font-medium bg-blue-900/50 text-blue-300 border border-blue-700/50">
+                                    {{.Backend}}
+                                </span>
+                                {{ else }}
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-md text-xs font-medium bg-yellow-900/50 text-yellow-300 border border-yellow-700/50">
+                                    auto
+                                </span>
+                                {{ end }}
+                            </div>
+                        </div>
+                    </div>
+                    
+                    <div class="px-5 pb-5 pt-2">
+                        <div class="flex flex-wrap gap-2">
+                            {{ range .KnownUsecaseStrings }}
+                            {{ if eq . "FLAG_CHAT" }}
+                                <a href="chat/{{$backendCfg.Name}}" class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-blue-900/60 text-blue-200 border border-blue-700/50 hover:bg-blue-800 transition duration-200 ease-in-out">
+                                    <i class="fas fa-comment-alt text-xs mr-1.5"></i>Chat
+                                </a>
+                            {{ end }}
+                            {{ if eq . "FLAG_IMAGE" }}
+                                <a href="text2image/{{$backendCfg.Name}}" class="inline-flex items-center text-sm bg-green-600/80 hover:bg-green-700 text-white py-1.5 px-3 rounded-lg shadow transition duration-300 ease-in-out">
+                                    <i class="fas fa-image text-xs mr-1.5"></i>Image
+                                </a>
+                            {{ end }}
+                            {{ if eq . "FLAG_TTS" }}
+                                <a href="tts/{{$backendCfg.Name}}" class="inline-flex items-center text-sm bg-purple-600/80 hover:bg-purple-700 text-white py-1.5 px-3 rounded-lg shadow transition duration-300 ease-in-out">
+                                    <i class="fas fa-microphone text-xs mr-1.5"></i>TTS
+                                </a>
+                            {{ end }}
+                            {{ end }}
+                        </div>
+                        
+                        <div class="mt-4 flex justify-end">
+                            <button
+                                class="inline-flex items-center text-xs font-medium text-red-400 hover:text-red-300 hover:bg-red-900/20 rounded-md px-2 py-1 transition-colors duration-200"
+                                data-twe-ripple-init=""
+                                hx-confirm="Are you sure you wish to delete this model?"
+                                hx-post="browse/delete/model/{{.Name}}"
+                                hx-swap="outerHTML">
+                                <i class="fas fa-trash-alt mr-1.5"></i>Delete
+                            </button>
+                        </div>
+                    </div>
+                </div>
                {{ end }}
+                
                {{ range .Models }}
-                <tr class="bg-gray-800 border-b border-gray-700">
-                    <td class="px-4 py-3">
-                            <img src="{{$noicon}}" class="rounded-t-lg max-h-24 max-w-24 object-cover mt-3">
-                    </td>
-                    <td class="px-4 py-3 font-bold">
-                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i>{{.}}</p>
-                    </td>
-                    <td class="px-4 py-3 font-bold">
-                        <span class="inline-block bg-yellow-500 text-white py-1 px-3 rounded-full text-xs">
-                            auto
-                        </span>
-                    </td>
-
-                    <td class="px-4 py-3">
-                        <span class="float-right inline-block bg-red-800 text-white py-1 px-3 rounded-full text-xs">
-                            No Configuration
-                        </span>
-                    </td>           
+                <div class="bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden transition-all duration-300 hover:shadow-lg hover:shadow-blue-900/20 hover:-translate-y-1 hover:border-blue-700/50">
+                    <div class="flex p-5">
+                        <div class="w-20 h-20 rounded-lg overflow-hidden flex-shrink-0 bg-gray-700/50 flex items-center justify-center">
+                            <img src="{{$noicon}}" class="w-full h-full object-contain" alt="Model icon">
+                        </div>
+                        <div class="ml-4 flex-1 min-w-0">
+                            <div class="flex items-center">
+                                <h3 class="font-bold text-lg text-white truncate"><i class="fas fa-brain mr-2 text-gray-400"></i>{{.}}</h3>
+                            </div>
+                            
+                            <div class="mt-2 flex flex-wrap gap-2">
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-md text-xs font-medium bg-yellow-900/50 text-yellow-300 border border-yellow-700/50">
+                                    auto
+                                </span>
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-md text-xs font-medium bg-red-900/50 text-red-300 border border-red-700/50">
+                                    No Configuration
+                                </span>
+                            </div>
+                        </div>
+                    </div>
+                </div>
                {{end}}
-                </tbody>
-            </table>
+            </div>
            {{ end }}
        </div>
    </div>
@@ -117,4 +189,4 @@
 </div>

 </body>
-</html>
+</html>
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -2,84 +2,183 @@
 <html lang="en">
 {{template "views/partials/head" .}}

-<body class="bg-gray-900 text-gray-200">
+<body class="bg-gradient-to-br from-gray-900 to-gray-950 text-gray-200">
 <div class="flex flex-col min-h-screen">
   
    {{template "views/partials/navbar" .}}
-    <div class="container mx-auto px-4 flex-grow">
+    {{ $numModelsPerPage := 21 }}
+    <div class="container mx-auto px-4 py-8 flex-grow">

-        <div class="models mt-12">
-            <h2 class="text-center text-3xl font-semibold text-gray-100">
-                🖼️ Available {{.AvailableModels}} models from <i>{{ len .Repositories }}</i> repositories     <a href="https://localai.io/models/" target="_blank" >
-                    <i class="fas fa-circle-info pr-2"></i>
-                </a></h2> 
-
-            <div class="text-center font-semibold text-gray-100">
-                <h2>Filter by type:</h2>
-                <button  hx-post="browse/search/models"
-                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
-                    hx-target="#search-results" 
-                    hx-vals='{"search": "tts"}'
-                hx-indicator=".htmx-indicator" >TTS</button> 
-                <button  hx-post="browse/search/models" 
-                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
-                    hx-target="#search-results" 
-                    hx-vals='{"search": "stablediffusion"}'
-                hx-indicator=".htmx-indicator" >Image generation</button> 
-                <button  hx-post="browse/search/models" \
-                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
-                    hx-target="#search-results" 
-                    hx-vals='{"search": "llm"}'
-                hx-indicator=".htmx-indicator" >Text generation</button> 
-                <button  hx-post="browse/search/models" 
-                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
-                    hx-target="#search-results" 
-                    hx-vals='{"search": "multimodal"}'
-                hx-indicator=".htmx-indicator" >Multimodal</button> 
-                <button  hx-post="browse/search/models" 
-                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
-                    hx-target="#search-results" 
-                    hx-vals='{"search": "embedding"}'
-                hx-indicator=".htmx-indicator" >Embeddings</button>
-                <button  hx-post="browse/search/models"
-                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
-                    hx-target="#search-results" 
-                    hx-vals='{"search": "rerank"}'
-                hx-indicator=".htmx-indicator" >Rerankers</button> 
-                <button  
-                    hx-post="browse/search/models"
-                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
-                    hx-target="#search-results" 
-                    hx-vals='{"search": "whisper"}'
-                hx-indicator=".htmx-indicator" >Audio transcription</button> 
+        <!-- Hero Header -->
+        <div class="bg-gradient-to-r from-indigo-900/30 to-purple-900/30 rounded-2xl shadow-xl p-6 mb-8">
+            <div class="max-w-4xl mx-auto text-center">
+                <h1 class="text-3xl md:text-4xl font-bold text-white mb-3">
+                    <span class="bg-clip-text text-transparent bg-gradient-to-r from-indigo-400 to-purple-400">
+                        Model Gallery
+                    </span>
+                </h1>
+                <p class="text-lg text-gray-300 mb-2">
+                    <span class="font-semibold text-indigo-300">{{.AvailableModels}}</span> models from 
+                    <span class="font-semibold text-purple-300">{{ len .Repositories }}</span> repositories
+                    <a href="https://localai.io/models/" target="_blank" class="ml-2 text-blue-400 hover:text-blue-300 transition">
+                        <i class="fas fa-circle-info"></i>
+                    </a>
+                </p>
+            </div>
+        </div>
+        
+        <!-- Search and Filter Section -->
+        <div class="bg-gray-800/70 rounded-xl p-6 mb-8 shadow-lg border border-gray-700/50">
+            <!-- Search Input -->
+            <div class="relative mb-6">
+                <div class="absolute inset-y-0 start-0 flex items-center ps-3 pointer-events-none">
+                    <i class="fas fa-search text-gray-400"></i>
+                </div>
+                <input class="form-control block w-full pl-10 px-4 py-3 text-base font-normal text-gray-300 bg-gray-900/80 bg-clip-padding border border-gray-700/70 rounded-lg transition ease-in-out focus:text-gray-200 focus:bg-gray-900 focus:border-blue-500 focus:ring-1 focus:ring-blue-500/50 focus:outline-none" 
+                    type="search" 
+                    name="search" 
+                    placeholder="Search models by name, tag, or description..." 
+                    hx-post="browse/search/models" 
+                    hx-trigger="input changed delay:500ms, search" 
+                    hx-target="#search-results"
+                    oninput="hidePagination()"
+                    onchange="hidePagination()"
+                    onsearch="hidePagination()"
+                    hx-indicator=".htmx-indicator">
+                <span class="htmx-indicator absolute right-3 top-3">
+                    <svg class="animate-spin h-5 w-5 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
+                        <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                        <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+                    </svg>
+                </span>
+            </div>
+            
+            <!-- Filter by Type -->
+            <div class="mb-4">
+                <h3 class="text-gray-200 font-medium mb-3">Filter by type:</h3>
+                <div class="flex flex-wrap gap-2">
+                    <button hx-post="browse/search/models"
+                        class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-indigo-900/60 text-indigo-200 border border-indigo-700/50 hover:bg-indigo-800 transition duration-200 ease-in-out"
+                        hx-target="#search-results" 
+                        hx-vals='{"search": "tts"}'
+                        onclick="hidePagination()"
+                        hx-indicator=".htmx-indicator">
+                        <i class="fas fa-microphone mr-2"></i>TTS
+                    </button>
+                    <button hx-post="browse/search/models" 
+                        class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-purple-900/60 text-purple-200 border border-purple-700/50 hover:bg-purple-800 transition duration-200 ease-in-out"
+                        hx-target="#search-results" 
+                        hx-vals='{"search": "stablediffusion"}'
+                        onclick="hidePagination()"
+                        hx-indicator=".htmx-indicator">
+                        <i class="fas fa-image mr-2"></i>Image generation
+                    </button>
+                    <button hx-post="browse/search/models"
+                        class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-blue-900/60 text-blue-200 border border-blue-700/50 hover:bg-blue-800 transition duration-200 ease-in-out"
+                        hx-target="#search-results" 
+                        hx-vals='{"search": "llm"}'
+                        onclick="hidePagination()"
+                        hx-indicator=".htmx-indicator">
+                        <i class="fas fa-comment-alt mr-2"></i>Text generation
+                    </button>
+                    <button hx-post="browse/search/models" 
+                        class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-green-900/60 text-green-200 border border-green-700/50 hover:bg-green-800 transition duration-200 ease-in-out"
+                        hx-target="#search-results" 
+                        hx-vals='{"search": "multimodal"}'
+                        onclick="hidePagination()"
+                        hx-indicator=".htmx-indicator">
+                        <i class="fas fa-object-group mr-2"></i>Multimodal
+                    </button>
+                    <button hx-post="browse/search/models" 
+                        class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-cyan-900/60 text-cyan-200 border border-cyan-700/50 hover:bg-cyan-800 transition duration-200 ease-in-out"
+                        hx-target="#search-results" 
+                        hx-vals='{"search": "embedding"}'
+                        onclick="hidePagination()"
+                        hx-indicator=".htmx-indicator">
+                        <i class="fas fa-vector-square mr-2"></i>Embeddings
+                    </button>
+                    <button hx-post="browse/search/models"
+                        class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-amber-900/60 text-amber-200 border border-amber-700/50 hover:bg-amber-800 transition duration-200 ease-in-out"
+                        hx-target="#search-results" 
+                        hx-vals='{"search": "rerank"}'
+                        onclick="hidePagination()"
+                        hx-indicator=".htmx-indicator">
+                        <i class="fas fa-sort-amount-up mr-2"></i>Rerankers
+                    </button>
+                    <button hx-post="browse/search/models"
+                        class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-teal-900/60 text-teal-200 border border-teal-700/50 hover:bg-teal-800 transition duration-200 ease-in-out"
+                        hx-target="#search-results" 
+                        hx-vals='{"search": "whisper"}'
+                        onclick="hidePagination()"
+                        hx-indicator=".htmx-indicator">
+                        <i class="fas fa-headphones mr-2"></i>Audio transcription
+                    </button>
+                </div>
+            </div>
+            
+            <!-- Filter by Tags -->
+            <div class="mt-5">
+                <h3 class="text-gray-200 font-medium mb-2">Filter by tags:</h3>
+                <div class="flex flex-wrap gap-2 max-h-24 overflow-y-auto scrollbar-thin scrollbar-thumb-gray-700 scrollbar-track-gray-900 pr-2">
+                    {{ range .AllTags }}
+                        <button hx-post="browse/search/models" 
+                            class="inline-flex items-center text-xs px-3 py-1 rounded-full bg-gray-700/60 text-gray-300 border border-gray-600/50 hover:bg-gray-600 hover:text-gray-100 transition duration-200 ease-in-out"
+                            hx-target="#search-results" 
+                            hx-vals='{"search": "{{.}}"}'
+                            onclick="hidePagination()"
+                            hx-indicator=".htmx-indicator">
+                            <i class="fas fa-tag text-xs mr-1.5"></i>{{.}}
+                        </button>
+                    {{ end }}
+                </div>
            </div>
-
-            <div class="text-center text-xs font-semibold text-gray-100">
-                Filter by tags:
-                {{ range .AllTags }}
-                    <button  hx-post="browse/search/models" class="text-blue-500" hx-target="#search-results" 
-                    hx-vals='{"search": "{{.}}"}'
-                    hx-indicator=".htmx-indicator" >{{.}}</button> 
-                {{ end }}
-            </div>
-
-            
-            <span class="htmx-indicator loader"></span>
-            {{template "views/partials/inprogress" .}}
-
-            <input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" 
-                name="search" placeholder="Begin Typing To Search models..." 
-                hx-post="browse/search/models" 
-                hx-trigger="input changed delay:500ms, search" 
-                hx-target="#search-results" 
-                hx-indicator=".htmx-indicator">
-
-            <div id="search-results">{{.Models}}</div>
        </div>
-    </div>

+        <!-- Results Section -->
+        <div id="search-results" class="transition-all duration-300">
+            {{.Models}}
+        </div>
+
+        <!-- Pagination -->
+        {{ if gt .AvailableModels $numModelsPerPage }}
+        <div id="paginate" class="flex justify-center mt-8">
+            <div class="flex items-center gap-4">
+                <button onclick="window.location.href='browse?page={{.PrevPage}}'" 
+                    class="flex items-center justify-center h-10 w-10 bg-gray-800/80 text-gray-300 hover:bg-indigo-900/70 hover:text-white rounded-lg shadow transition duration-300 ease-in-out {{if not .PrevPage}}opacity-50 cursor-not-allowed{{end}}" 
+                    {{if not .PrevPage}}disabled{{end}}>
+                    <i class="fas fa-chevron-left"></i>
+                </button>
+                <div class="text-gray-400 text-sm">
+                    Page <span class="text-white font-medium">{{add .PrevPage 1}}</span>
+                </div>
+                <button onclick="window.location.href='browse?page={{.NextPage}}'" 
+                    class="flex items-center justify-center h-10 w-10 bg-gray-800/80 text-gray-300 hover:bg-indigo-900/70 hover:text-white rounded-lg shadow transition duration-300 ease-in-out {{if not .NextPage}}opacity-50 cursor-not-allowed{{end}}"
+                    {{if not .NextPage}}disabled{{end}}>
+                    <i class="fas fa-chevron-right"></i>
+                </button>
+            </div>
+        </div>
+        {{ end }}
+
+    </div>
    {{template "views/partials/footer" .}}
 </div>

+<script>
+    function hidePagination() {
+        const paginateDiv = document.getElementById('paginate');
+        if (paginateDiv) {
+            paginateDiv.style.display = 'none';
+        }
+    }
+
+    // Listen for the htmx:afterSwap event to handle cases when the search results are updated
+    document.body.addEventListener('htmx:afterSwap', function(event) {
+        if (event.detail.target.id === 'search-results') {
+            hidePagination();
+        }
+    });
+</script>
+
 </body>
-</html>
+</html>
--- a/core/http/views/partials/footer.html
+++ b/core/http/views/partials/footer.html
@@ -1,5 +1,5 @@
 <footer class="text-center py-8">
    LocalAI Version {{.Version}}<br>
-    <a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
+    <a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2025 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
 </footer>
 <script src="static/assets/tw-elements.js"></script>
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -1,36 +1,20 @@
 <head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>{{.Title}}</title>
-    <base href="{{.BaseURL}}" />
-    <link rel="icon" type="image/x-icon" href="favicon.ico" />
-    <link
-    rel="stylesheet"
-    href="static/assets/highlightjs.css"
-  />
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>{{.Title}}</title>
+  <base href="{{.BaseURL}}" />
+  <link rel="icon" type="image/x-icon" href="favicon.ico" />
+  <link rel="stylesheet" href="static/assets/highlightjs.css" />
  <script defer src="static/assets/highlightjs.js"></script>
-    <script
-    defer
-    src="static/assets/alpine.js"
-  ></script>
-  <script
-    defer
-    src="static/assets/marked.js"
-  ></script>
-  <script
-    defer
-    src="static/assets/purify.js"
-  ></script>
-
+  <script defer src="static/assets/alpine.js"></script>
+  <script defer src="static/assets/marked.js"></script>
+  <script defer src="static/assets/purify.js"></script>
  <link href="static/general.css" rel="stylesheet" />
-    <link href="static/assets/font1.css" rel="stylesheet">
-    <link
-    href="static/assets/font2.css"
-    rel="stylesheet" />
-  <link
-    rel="stylesheet"
-    href="static/assets/tw-elements.css" />
+  <link href="static/assets/font1.css" rel="stylesheet">
+  <link href="static/assets/font2.css" rel="stylesheet" />
+  <link rel="stylesheet" href="static/assets/tw-elements.css" />
  <script src="static/assets/tailwindcss.js"></script>
+
  <script>
    tailwind.config = {
      darkMode: "class",
@@ -48,84 +32,75 @@
    function copyClipboard(token) {
      navigator.clipboard.writeText(token)
      .then(() => {
-          console.log('Text copied to clipboard:', token);
-          alert('Text copied to clipboard!');
+        console.log('Text copied to clipboard:', token);
+        alert('Text copied to clipboard!');
      })
      .catch(err => {
-          console.error('Failed to copy token:', err);
+        console.error('Failed to copy token:', err);
      });
    }
  </script>
+
  <link href="static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
  <link href="static/assets/fontawesome/css/brands.css" rel="stylesheet" />
  <link href="static/assets/fontawesome/css/solid.css" rel="stylesheet" />
  <script src="static/assets/flowbite.min.js"></script>
  <script src="static/assets/htmx.js" crossorigin="anonymous"></script>
-  <!-- P2P Animation START -->
+
+  <!-- Example responsive styling improvements -->
  <style>
    .animation-container {
-        position: relative;
-        width: 100%;
-        height: 25vh;
-        display: flex;
-        justify-content: center;
-        align-items: center;
-        overflow: hidden;
+      position: relative;
+      width: 100%;
+      height: 25vh;
+      display: flex;
+      justify-content: center;
+      align-items: center;
+      overflow: hidden;
    }
-
    canvas {
-        position: absolute;
-        top: 0;
-        left: 0;
+      position: absolute;
+      top: 0;
+      left: 0;
    }
-
    .text-overlay {
-        position: absolute;
-        top: 50%;
-        left: 50%;
-        transform: translate(-50%, -50%);
-        text-align: center;
-        z-index: 1;
+      position: absolute;
+      top: 50%;
+      left: 50%;
+      transform: translate(-50%, -50%);
+      text-align: center;
+      z-index: 1;
+    }
+    .fa-circle-nodes {
+      animation: rotateCircleNodes 8s linear infinite;
+      display: inline-block;
+    }
+    @keyframes rotateCircleNodes {
+      0% { transform: rotate(0deg); }
+      100% { transform: rotate(360deg); }
+    }
+    .fa-flask {
+      animation: shakeFlask 3s ease-in-out infinite;
+      transform-origin: bottom center;
+    }
+    @keyframes shakeFlask {
+      0%, 10% { transform: rotate(0deg); }
+      20% { transform: rotate(-10deg); }
+      30% { transform: rotate(10deg); }
+      40% { transform: rotate(-8deg); }
+      50% { transform: rotate(8deg); }
+      60% { transform: rotate(-5deg); }
+      70% { transform: rotate(5deg); }
+      80% { transform: rotate(-2deg); }
+      90% { transform: rotate(2deg); }
+      100% { transform: rotate(0deg); }
    }
  </style>
-  <!-- P2P Animation END -->
-  <!-- Flask and node animation -->
-  <style>
-            .fa-circle-nodes {
-           /* font-size: 100px; /* Adjust the size as needed */
-            animation: rotateCircleNodes 8s linear infinite; /* Slow and fluid rotation */
-            display: inline-block;
-        }

-        @keyframes rotateCircleNodes {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
-        }
-        /* Animation for the warning box */
-        .fa-flask {
-         /*  font-size: 100px; /* Adjust the size as needed */
-            animation: shakeFlask 3s ease-in-out infinite; /* Smooth easing and longer duration for fluidity */
-            transform-origin: bottom center;
-        }
-
-        @keyframes shakeFlask {
-            0%, 10% { transform: rotate(0deg); } /* Start and end still */
-            20% { transform: rotate(-10deg); } /* Smooth transition to left */
-            30% { transform: rotate(10deg); } /* Smooth transition to right */
-            40% { transform: rotate(-8deg); } /* Smooth transition to left */
-            50% { transform: rotate(8deg); } /* Smooth transition to right */
-            60% { transform: rotate(-5deg); } /* Smooth transition to left */
-            70% { transform: rotate(5deg); } /* Smooth transition to right */
-            80% { transform: rotate(-2deg); } /* Smooth transition to left */
-            90% { transform: rotate(2deg); } /* Smooth transition to right */
-            100% { transform: rotate(0deg); } /* Return to center */
-        }
-  </style>
-
-  <!-- https://stackoverflow.com/questions/76051980/flowbite-component-not-working-when-loaded-via-htmx-django-project -->
+  <!-- Initialize Flowbite on HTMX content load -->
  <script>
-      htmx.onLoad(function(content) {
-          initFlowbite();
-      })
+    htmx.onLoad(function(content) {
+      initFlowbite();
+    });
  </script>
 </head>
--- a/core/http/views/text2image.html
+++ b/core/http/views/text2image.html
@@ -7,21 +7,31 @@
 <div class="flex flex-col min-h-screen">
   
    {{template "views/partials/navbar" .}}
-    <div class="container mx-auto px-4 flex-grow " x-data="{ component: 'menu' }">
+    <div class="container mx-auto px-4 py-8 flex-grow " x-data="{ component: 'menu' }">
    

+          <!-- Hero Header -->
+          <div class="bg-gradient-to-r from-indigo-900/30 to-purple-900/30 rounded-2xl shadow-xl p-6 mb-8">
+            <div class="max-w-4xl mx-auto text-center">
+                <h1 class="text-3xl md:text-4xl font-bold text-white mb-3">
+                    <span class="bg-clip-text text-transparent bg-gradient-to-r from-indigo-400 to-purple-400">
+                        Image generation {{ if .Model }} with {{.Model}} {{ end }}
+                    </span>
+                </h1>
+                <div class="flex flex-wrap justify-center gap-4">
+                  <a href="https://localai.io/features/image-generation/" target="_blank" 
+                     class="group flex items-center bg-blue-600 hover:bg-blue-700 text-white py-2 px-6 rounded-lg transition duration-300 ease-in-out transform hover:scale-105 hover:shadow-lg">
+                      <i class="fas fa-book-reader mr-2"></i>
+                      <span>Documentation</span>
+                      <i class="fas fa-arrow-right opacity-0 group-hover:opacity-100 group-hover:translate-x-2 ml-2 transition-all duration-300"></i>
+                  </a>
+              </div>
+            </div>
+        </div>
+      
+            
        <div class="mt-12">
-          <div class="flex items-center justify-center text-center pb-2">
-            <span class="text-3xl font-semibold text-gray-100">
-              🖼️ Text to Image
-            <a href="https://localai.io/features/image-generation" target="_blank" >
-              <i class="fas fa-circle-info pr-2"></i>
-            </a>
-            </span>
-    
-          </div>
-
-            <div class="text-center font-semibold text-gray-100">
+           <div class="text-center font-semibold text-gray-100">
              <div class="flex items-center justify-between">

              <div x-show="component === 'menu'" id="menu">
@@ -49,18 +59,22 @@
                <option value="" disabled class="text-gray-400" >Select a model</option>
                {{ $model:=.Model}}
                {{ range .ModelsConfig }}
-                {{ if eq .Name $model }}
-                <option value="text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
-                {{ else }}
-                <option value="text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
-                {{ end }}
+                  {{ $cfg := . }}
+                  {{ range .KnownUsecaseStrings }}
+                    {{ if eq . "FLAG_IMAGE" }}
+                        <option value="text2image/{{$cfg.Name}}" {{ if eq $cfg.Name $model }} selected {{end}} class="bg-gray-700 text-white">{{$cfg.Name}}</option>
+                    {{ end }}
+                  {{ end }}
                {{ end }}
+                {{ range .ModelsWithoutConfig }}
+                    <option value="text2image/{{.}}" {{ if eq . $model }} selected {{ end }} class="bg-gray-700 text-white">{{.}}</option>
+                {{end}}
              </select>
              
              </div>
            </div>

-            <div class="mt-12">
+            <div class="mt-12 relative">
              <input id="image-model" type="hidden" value="{{.Model}}">
              <form id="genimage" action="text2image/{{.Model}}" method="get">
                <input
@@ -69,13 +83,16 @@
                  name="input"
                  placeholder="Prompt…"
                  autocomplete="off"
-                  class="p-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
+                  class="form-control block w-full pl-10 px-4 py-3 text-base font-normal text-gray-300 bg-gray-900/80 bg-clip-padding border border-gray-700/70 rounded-lg transition ease-in-out focus:text-gray-200 focus:bg-gray-900 focus:border-blue-500 focus:ring-1 focus:ring-blue-500/50 focus:outline-none"
                  required
                />
+                <span id="loader" class="my-2 loader absolute right-3 top-2"  >
+                  <svg class="animate-spin h-5 w-5 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
+                      <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                      <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+                  </svg>
+                </span>
              </form>
-              <div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
-                <div id="loader" class="my-2 loader"  ></div>
-              </div>
              <div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
                <div id="result" class="mx-auto"></div>
              </div>
--- a/Show More
+++ b/Show More