chore(deps): bump llama.cpp to '92bb442ad999a0d52df0af2730cd861012e8ac5c'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
chore(importers): small logic enhancements (#7262 )
2026-05-19 14:17:21 -04:00 · 2025-11-12 22:20:15 +01:00 · 2025-11-12 22:08:08 +01:00 · 2025-11-12 20:48:56 +01:00 · 2025-11-12 09:20:54 +01:00 · 2025-11-12 09:14:09 +01:00
95 changed files with 5313 additions and 2121 deletions
--- a/.air.toml
+++ b/.air.toml
@@ -0,0 +1,8 @@
+# .air.toml
+[build]
+cmd = "make build"
+bin = "./local-ai"
+args_bin = [ "--debug" ]
+include_ext = ["go", "html", "yaml", "toml", "json", "txt", "md"]
+exclude_dir = ["pkg/grpc/proto"]
+delay = 1000
--- a/.github/gallery-agent/agent.go
+++ b/.github/gallery-agent/agent.go
@@ -7,8 +7,8 @@ import (
 	"slices"
 	"strings"

-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
-	"github.com/mudler/cogito"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+	cogito "github.com/mudler/cogito"

 	"github.com/mudler/cogito/structures"
 	"github.com/sashabaranov/go-openai/jsonschema"
--- a/.github/gallery-agent/go.mod
+++ b/.github/gallery-agent/go.mod
@@ -1,39 +0,0 @@
-module github.com/go-skynet/LocalAI/.github/gallery-agent
-
-go 1.24.1
-
-require (
-	github.com/mudler/cogito v0.3.0
-	github.com/onsi/ginkgo/v2 v2.25.3
-	github.com/onsi/gomega v1.38.2
-	github.com/sashabaranov/go-openai v1.41.2
-	github.com/tmc/langchaingo v0.1.13
-	gopkg.in/yaml.v3 v3.0.1
-)
-
-require (
-	dario.cat/mergo v1.0.1 // indirect
-	github.com/Masterminds/goutils v1.1.1 // indirect
-	github.com/Masterminds/semver/v3 v3.4.0 // indirect
-	github.com/Masterminds/sprig/v3 v3.3.0 // indirect
-	github.com/go-logr/logr v1.4.3 // indirect
-	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
-	github.com/google/go-cmp v0.7.0 // indirect
-	github.com/google/jsonschema-go v0.3.0 // indirect
-	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
-	github.com/google/uuid v1.6.0 // indirect
-	github.com/huandu/xstrings v1.5.0 // indirect
-	github.com/mitchellh/copystructure v1.2.0 // indirect
-	github.com/mitchellh/reflectwalk v1.0.2 // indirect
-	github.com/modelcontextprotocol/go-sdk v1.0.0 // indirect
-	github.com/shopspring/decimal v1.4.0 // indirect
-	github.com/spf13/cast v1.7.0 // indirect
-	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
-	go.uber.org/automaxprocs v1.6.0 // indirect
-	go.yaml.in/yaml/v3 v3.0.4 // indirect
-	golang.org/x/crypto v0.41.0 // indirect
-	golang.org/x/net v0.43.0 // indirect
-	golang.org/x/sys v0.35.0 // indirect
-	golang.org/x/text v0.28.0 // indirect
-	golang.org/x/tools v0.36.0 // indirect
-)
--- a/.github/gallery-agent/go.sum
+++ b/.github/gallery-agent/go.sum
@@ -1,168 +0,0 @@
-dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
-dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
-github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
-github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
-github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
-github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
-github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
-github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
-github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
-github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
-github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
-github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
-github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
-github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
-github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
-github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
-github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
-github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
-github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
-github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
-github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
-github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
-github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
-github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
-github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw=
-github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
-github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
-github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
-github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
-github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw=
-github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
-github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
-github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
-github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
-github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
-github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
-github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
-github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
-github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
-github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
-github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
-github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
-github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
-github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
-github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/jsonschema-go v0.3.0 h1:6AH2TxVNtk3IlvkkhjrtbUc4S8AvO0Xii0DxIygDg+Q=
-github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
-github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
-github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
-github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
-github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
-github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
-github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
-github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
-github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
-github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
-github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
-github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
-github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
-github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
-github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
-github.com/moby/go-archive v0.1.0 h1:Kk/5rdW/g+H8NHdJW2gsXyZ7UnzvJNOy6VKJqueWdcQ=
-github.com/moby/go-archive v0.1.0/go.mod h1:G9B+YoujNohJmrIYFBpSd54GTUB4lt9S+xVQvsJyFuo=
-github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
-github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
-github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
-github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
-github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
-github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
-github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
-github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
-github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
-github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
-github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74=
-github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs=
-github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
-github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
-github.com/mudler/cogito v0.3.0 h1:NbVAO3bLkK5oGSY0xq87jlz8C9OIsLW55s+8Hfzeu9s=
-github.com/mudler/cogito v0.3.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU=
-github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
-github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
-github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
-github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
-github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
-github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
-github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
-github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
-github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
-github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
-github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
-github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
-github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
-github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
-github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc=
-github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
-github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
-github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
-github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
-github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
-github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
-github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
-github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
-github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
-github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
-github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
-github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
-github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
-github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
-github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
-github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
-github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
-go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
-go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc=
-go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
-go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
-go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
-go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
-go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
-go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
-go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
-go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
-go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
-go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
-golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
-golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
-golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
-golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
-golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
-golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
-golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
-golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
-golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
-google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
-google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/.github/gallery-agent/main.go
+++ b/.github/gallery-agent/main.go
@@ -9,7 +9,7 @@ import (
 	"strings"
 	"time"

-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 )

 // ProcessedModelFile represents a processed model file with additional metadata
--- a/.github/gallery-agent/tools.go
+++ b/.github/gallery-agent/tools.go
@@ -3,9 +3,9 @@ package main
 import (
 	"fmt"

-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
-	"github.com/sashabaranov/go-openai"
-	"github.com/tmc/langchaingo/jsonschema"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+	openai "github.com/sashabaranov/go-openai"
+	jsonschema "github.com/sashabaranov/go-openai/jsonschema"
 )

 // Get repository README from HF
@@ -13,7 +13,7 @@ type HFReadmeTool struct {
 	client *hfapi.Client
 }

-func (s *HFReadmeTool) Run(args map[string]any) (string, error) {
+func (s *HFReadmeTool) Execute(args map[string]any) (string, error) {
 	q, ok := args["repository"].(string)
 	if !ok {
 		return "", fmt.Errorf("no query")
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -1,10 +1,10 @@
-name: Bump dependencies
+name: Bump Backend dependencies
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
-  bump:
+  bump-backends:
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -1,10 +1,10 @@
-name: Bump dependencies
+name: Bump Documentation
 on:
  schedule:
    - cron: 0 20 * * *
  workflow_dispatch:
 jobs:
-  bump:
+  bump-docs:
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -33,7 +33,7 @@ jobs:
        run: |
          CGO_ENABLED=0 make build
      - name: rm
-        uses: appleboy/ssh-action@v1.2.2
+        uses: appleboy/ssh-action@v1.2.3
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
            rm: true
            target: ./local-ai
      - name: restarting
-        uses: appleboy/ssh-action@v1.2.2
+        uses: appleboy/ssh-action@v1.2.3
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -2,7 +2,7 @@ name: Gallery Agent
 on:

  schedule:
-    - cron: '0 */1 * * *'  # Run every 4 hours
+    - cron: '0 */3 * * *'  # Run every 4 hours
  workflow_dispatch:
    inputs:
      search_term:
@@ -39,11 +39,6 @@ jobs:
        with:
          go-version: '1.21'

-      - name: Build gallery agent
-        run: |
-          cd .github/gallery-agent
-          go mod download
-          go build -o gallery-agent .

      - name: Run gallery agent
        env:
@@ -56,9 +51,7 @@ jobs:
          MAX_MODELS: ${{ github.event.inputs.max_models || '1' }}
        run: |
          export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml
-          cd .github/gallery-agent
-          ./gallery-agent
-          rm -rf gallery-agent
+          go run .github/gallery-agent

      - name: Check for changes
        id: check_changes
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -30,6 +30,7 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
 3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
 4. Build LocalAI: `make build`
 5. Run LocalAI: `./local-ai`
+6. To Build and live reload: `make build-dev`

 ## Contributing

@@ -76,7 +77,7 @@ LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
 ## Documentation

 We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
- 
+
 ## Community and Communication

 - You can reach out via the Github issue tracker.
--- a/4
+++ b/4
@@ -103,6 +103,10 @@ build-launcher: ## Build the launcher application

 build-all: build build-launcher ## Build both server and launcher

+build-dev: ## Run LocalAI in dev mode with live reload
+	@command -v air >/dev/null 2>&1 || go install github.com/air-verse/air@latest
+	air -c .air.toml
+
 dev-dist:
 	$(GORELEASER) build --snapshot --clean

--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@

 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
 >
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on 
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on 
 [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)

 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -116,6 +116,8 @@ For more installation options, see [Installer Options](https://localai.io/docs/a
  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
 </a>

+> Note: the DMGs are not signed by Apple as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
+
 Or run with docker:

 > **💡 Docker Run vs Docker Start**
@@ -200,7 +202,7 @@ local-ai run oci://localai/phi-2:latest

 > ⚡ **Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration](https://localai.io/features/gpu-acceleration/#automatic-backend-detection).

-For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
+For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html), if you are interested in our roadmap items and future enhancements, you can see the [Issues labeled as Roadmap here](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

 ## 📰 Latest project news

--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -154,6 +154,8 @@ message PredictOptions {
  repeated string Videos = 45;
  repeated string Audios = 46;
  string CorrelationId = 47;
+  string Tools = 48;  // JSON array of available tools/functions for tool calling
+  string ToolChoice = 49;  // JSON string or object specifying tool choice behavior
 }

 // The response message containing the result
@@ -382,6 +384,11 @@ message StatusResponse {
 message Message {
  string role = 1;
  string content = 2;
+  // Optional fields for OpenAI-compatible message format
+  string name = 3;                    // Tool name (for tool messages)
+  string tool_call_id = 4;            // Tool call ID (for tool messages)
+  string reasoning_content = 5;       // Reasoning content (for thinking models)
+  string tool_calls = 6;              // Tool calls as JSON string (for assistant messages with tool calls)
 }

 message DetectOptions {
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=5a4ff43e7dd049e35942bc3d12361dab2f155544
+LLAMA_VERSION?=92bb442ad999a0d52df0af2730cd861012e8ac5c
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=f16c12f3f55f5bd3d6ac8cf2f31ab90a42c884d5
+WHISPER_CPP_VERSION?=a1867e0dad0b21b35afa43fc815dae60c9a139d6
 SO_TARGET?=libgowhisper.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/python/chatterbox/requirements-cpu.txt
+++ b/backend/python/chatterbox/requirements-cpu.txt
@@ -2,6 +2,7 @@
 accelerate
 torch
 torchaudio
+numpy>=1.24.0,<1.26.0
 transformers
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
--- a/backend/python/chatterbox/requirements-cublas11.txt
+++ b/backend/python/chatterbox/requirements-cublas11.txt
@@ -2,6 +2,7 @@
 torch==2.6.0+cu118
 torchaudio==2.6.0+cu118
 transformers==4.46.3
+numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-cublas12.txt
+++ b/backend/python/chatterbox/requirements-cublas12.txt
@@ -1,6 +1,7 @@
 torch
 torchaudio
 transformers
+numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-hipblas.txt
+++ b/backend/python/chatterbox/requirements-hipblas.txt
@@ -2,6 +2,7 @@
 torch==2.6.0+rocm6.1
 torchaudio==2.6.0+rocm6.1
 transformers
+numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-intel.txt
+++ b/backend/python/chatterbox/requirements-intel.txt
@@ -3,6 +3,7 @@ intel-extension-for-pytorch==2.3.110+xpu
 torch==2.3.1+cxx11.abi
 torchaudio==2.3.1+cxx11.abi
 transformers
+numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/chatterbox/requirements-l4t.txt
+++ b/backend/python/chatterbox/requirements-l4t.txt
@@ -2,5 +2,6 @@
 torch
 torchaudio
 transformers
+numpy>=1.24.0,<1.26.0
 chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
 accelerate
--- a/backend/python/rerankers/backend.py
+++ b/backend/python/rerankers/backend.py
@@ -61,7 +61,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            if request.PipelineType != "": # Reuse the PipelineType field for language
                kwargs['lang'] = request.PipelineType
            self.model_name = model_name
-            self.model = Reranker(model_name, **kwargs)  
+            self.model = Reranker(model_name, **kwargs)
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

@@ -75,12 +75,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            documents.append(doc)
        ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
        # Prepare results to return
+        cropped_results = ranked_results.top_k(request.top_n) if request.top_n > 0 else ranked_results
        results = [
            backend_pb2.DocumentResult(
                index=res.doc_id,
                text=res.text,
                relevance_score=res.score
-            ) for res in ranked_results.results
+            ) for res in (cropped_results)
        ]

        # Calculate the usage and total tokens
--- a/backend/python/rerankers/test.py
+++ b/backend/python/rerankers/test.py
@@ -76,7 +76,7 @@ class TestBackendServicer(unittest.TestCase):
                )
                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
                self.assertTrue(response.success)
-               
+
                rerank_response = stub.Rerank(request)
                print(rerank_response.results[0])
                self.assertIsNotNone(rerank_response.results)
@@ -87,4 +87,60 @@ class TestBackendServicer(unittest.TestCase):
            print(err)
            self.fail("Reranker service failed")
        finally:
-            self.tearDown()
+            self.tearDown()
+
+    def test_rerank_omit_top_n(self):
+        """
+        This method tests if the embeddings are generated successfully even top_n is omitted
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                request = backend_pb2.RerankRequest(
+                    query="I love you",
+                    documents=["I hate you", "I really like you"],
+                    top_n=0 # 
+                )
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
+                self.assertTrue(response.success)
+
+                rerank_response = stub.Rerank(request)
+                print(rerank_response.results[0])
+                self.assertIsNotNone(rerank_response.results)
+                self.assertEqual(len(rerank_response.results), 2)
+                self.assertEqual(rerank_response.results[0].text, "I really like you")
+                self.assertEqual(rerank_response.results[1].text, "I hate you")
+        except Exception as err:
+            print(err)
+            self.fail("Reranker service failed")
+        finally:
+            self.tearDown()
+
+    def test_rerank_crop(self):
+        """
+        This method tests top_n cropping
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                request = backend_pb2.RerankRequest(
+                    query="I love you",
+                    documents=["I hate you", "I really like you", "I hate ignoring top_n"],
+                    top_n=2
+                )
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
+                self.assertTrue(response.success)
+
+                rerank_response = stub.Rerank(request)
+                print(rerank_response.results[0])
+                self.assertIsNotNone(rerank_response.results)
+                self.assertEqual(len(rerank_response.results), 2)
+                self.assertEqual(rerank_response.results[0].text, "I really like you")
+                self.assertEqual(rerank_response.results[1].text, "I hate you")
+        except Exception as err:
+            print(err)
+            self.fail("Reranker service failed")
+        finally:
+            self.tearDown()
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -22,9 +22,15 @@ func New(opts ...config.AppOption) (*Application, error) {

 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
+
+	if err := application.start(); err != nil {
+		return nil, err
+	}
+
 	caps, err := xsysinfo.CPUCapabilities()
 	if err == nil {
 		log.Debug().Msgf("CPU capabilities: %v", caps)
+
 	}
 	gpus, err := xsysinfo.GPUs()
 	if err == nil {
@@ -56,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

-	if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
+	if err := coreStartup.InstallModels(application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
 		log.Error().Err(err).Msg("error installing models")
 	}

@@ -152,10 +158,6 @@ func New(opts ...config.AppOption) (*Application, error) {
 	// Watch the configuration directory
 	startWatcher(options)

-	if err := application.start(); err != nil {
-		return nil, err
-	}
-
 	log.Info().Msg("core/startup process completed!")
 	return application, nil
 }
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -2,8 +2,6 @@ package backend

 import (
 	"context"
-	"encoding/json"
-	"fmt"
 	"regexp"
 	"slices"
 	"strings"
@@ -35,7 +33,7 @@ type TokenUsage struct {
 	TimingTokenGeneration  float64
 }

-func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, messages schema.Messages, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool, tools string, toolChoice string) (func() (LLMResponse, error), error) {
 	modelFile := c.Model

 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
@@ -65,29 +63,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 	var protoMessages []*proto.Message
 	// if we are using the tokenizer template, we need to convert the messages to proto messages
 	// unless the prompt has already been tokenized (non-chat endpoints + functions)
-	if c.TemplateConfig.UseTokenizerTemplate && s == "" {
-		protoMessages = make([]*proto.Message, len(messages), len(messages))
-		for i, message := range messages {
-			protoMessages[i] = &proto.Message{
-				Role: message.Role,
-			}
-			switch ct := message.Content.(type) {
-			case string:
-				protoMessages[i].Content = ct
-			case []interface{}:
-				// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
-				data, _ := json.Marshal(ct)
-				resultData := []struct {
-					Text string `json:"text"`
-				}{}
-				json.Unmarshal(data, &resultData)
-				for _, r := range resultData {
-					protoMessages[i].Content += r.Text
-				}
-			default:
-				return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
-			}
-		}
+	if c.TemplateConfig.UseTokenizerTemplate && len(messages) > 0 {
+		protoMessages = messages.ToProto()
 	}

 	// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
@@ -99,6 +76,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 		opts.Images = images
 		opts.Videos = videos
 		opts.Audios = audios
+		opts.Tools = tools
+		opts.ToolChoice = toolChoice

 		tokenUsage := TokenUsage{}

--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -1,12 +1,14 @@
 package cli

 import (
+	"context"
 	"encoding/json"
 	"errors"
 	"fmt"

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/services"

 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/startup"
@@ -78,6 +80,12 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 		return err
 	}

+	galleryService := services.NewGalleryService(&config.ApplicationConfig{}, model.NewModelLoader(systemState, true))
+	err = galleryService.Start(context.Background(), config.NewModelConfigLoader(mi.ModelsPath), systemState)
+	if err != nil {
+		return err
+	}
+
 	var galleries []config.Gallery
 	if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
@@ -127,7 +135,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 		}

 		modelLoader := model.NewModelLoader(systemState, true)
-		err = startup.InstallModels(galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
+		err = startup.InstallModels(galleryService, galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
 		if err != nil {
 			return err
 		}
--- a/core/config/gguf.go
+++ b/core/config/gguf.go
@@ -1,151 +1,17 @@
 package config

 import (
-	"strings"
-
 	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"

 	gguf "github.com/gpustack/gguf-parser-go"
 )

-type familyType uint8
-
-const (
-	Unknown familyType = iota
-	LLaMa3
-	CommandR
-	Phi3
-	ChatML
-	Mistral03
-	Gemma
-	DeepSeek2
-)
-
 const (
 	defaultContextSize = 1024
 	defaultNGPULayers  = 99999999
 )

-type settingsConfig struct {
-	StopWords      []string
-	TemplateConfig TemplateConfig
-	RepeatPenalty  float64
-}
-
-// default settings to adopt with a given model family
-var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
-	Gemma: {
-		RepeatPenalty: 1.0,
-		StopWords:     []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
-		TemplateConfig: TemplateConfig{
-			Chat:        "{{.Input }}\n<start_of_turn>model\n",
-			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
-			Completion:  "{{.Input}}",
-		},
-	},
-	DeepSeek2: {
-		StopWords: []string{"<｜end▁of▁sentence｜>"},
-		TemplateConfig: TemplateConfig{
-			ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
-{{ end -}}
-{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<｜end▁of▁sentence｜>{{end}}
-{{if eq .RoleName "system" -}}{{.Content}}
-{{end -}}`,
-			Chat: "{{.Input -}}\nAssistant: ",
-		},
-	},
-	LLaMa3: {
-		StopWords: []string{"<|eot_id|>"},
-		TemplateConfig: TemplateConfig{
-			Chat:        "<|begin_of_text|>{{.Input }}\n<|start_header_id|>assistant<|end_header_id|>",
-			ChatMessage: "<|start_header_id|>{{ .RoleName }}<|end_header_id|>\n\n{{.Content }}<|eot_id|>",
-		},
-	},
-	CommandR: {
-		TemplateConfig: TemplateConfig{
-			Chat: "{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
-			Functions: `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
-You are a function calling AI model, you can call the following functions:
-## Available Tools
-{{range .Functions}}
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
-{{end}}
-When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
-<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}`,
-			ChatMessage: `{{if eq .RoleName "user" -}}
-<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
-{{- else if eq .RoleName "system" -}}
-<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
-{{- else if eq .RoleName "assistant" -}}
-<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
-{{- else if eq .RoleName "tool" -}}
-<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
-{{- else if .FunctionCall -}}
-<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
-{{- end -}}`,
-		},
-		StopWords: []string{"<|END_OF_TURN_TOKEN|>"},
-	},
-	Phi3: {
-		TemplateConfig: TemplateConfig{
-			Chat:        "{{.Input}}\n<|assistant|>",
-			ChatMessage: "<|{{ .RoleName }}|>\n{{.Content}}<|end|>",
-			Completion:  "{{.Input}}",
-		},
-		StopWords: []string{"<|end|>", "<|endoftext|>"},
-	},
-	ChatML: {
-		TemplateConfig: TemplateConfig{
-			Chat: "{{.Input -}}\n<|im_start|>assistant",
-			Functions: `<|im_start|>system
-You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-{{range .Functions}}
-{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-{{end}}
-For each function call return a json object with function name and arguments
-<|im_end|>
-{{.Input -}}
-<|im_start|>assistant`,
-			ChatMessage: `<|im_start|>{{ .RoleName }}
-{{ if .FunctionCall -}}
-Function call:
-{{ else if eq .RoleName "tool" -}}
-Function response:
-{{ end -}}
-{{ if .Content -}}
-{{.Content }}
-{{ end -}}
-{{ if .FunctionCall -}}
-{{toJson .FunctionCall}}
-{{ end -}}<|im_end|>`,
-		},
-		StopWords: []string{"<|im_end|>", "<dummy32000>", "</s>"},
-	},
-	Mistral03: {
-		TemplateConfig: TemplateConfig{
-			Chat:      "{{.Input -}}",
-			Functions: `[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}`,
-			ChatMessage: `{{if eq .RoleName "user" -}}
-[INST] {{.Content }} [/INST]
-{{- else if .FunctionCall -}}
-[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
-{{- else if eq .RoleName "tool" -}}
-[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
-{{- else -}}
-{{ .Content -}}
-{{ end -}}`,
-		},
-		StopWords: []string{"<|im_end|>", "<dummy32000>", "</tool_call>", "<|eot_id|>", "<|end_of_text|>", "</s>", "[/TOOL_CALLS]", "[/ACTIONS]"},
-	},
-}
-
-// this maps well known template used in HF to model families defined above
-var knownTemplates = map[string]familyType{
-	`{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}`:                              ChatML,
-	`{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`: Mistral03,
-}
-
 func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {

 	if defaultCtx == 0 && cfg.ContextSize == nil {
@@ -216,81 +82,9 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
 		cfg.Name = f.Metadata().Name
 	}

-	family := identifyFamily(f)
-
-	if family == Unknown {
-		log.Debug().Msgf("guessDefaultsFromFile: %s", "family not identified")
-		return
-	}
-
-	// identify template
-	settings, ok := defaultsSettings[family]
-	if ok {
-		cfg.TemplateConfig = settings.TemplateConfig
-		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: guessed template %+v", cfg.TemplateConfig)
-		if len(cfg.StopWords) == 0 {
-			cfg.StopWords = settings.StopWords
-		}
-		if cfg.RepeatPenalty == 0.0 {
-			cfg.RepeatPenalty = settings.RepeatPenalty
-		}
-	} else {
-		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
-	}
-
-	if cfg.HasTemplate() {
-		return
-	}
-
-	// identify from well known templates first, otherwise use the raw jinja template
-	chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
-	if found {
-		// try to use the jinja template
-		cfg.TemplateConfig.JinjaTemplate = true
-		cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
-	}
-
-}
-
-func identifyFamily(f *gguf.GGUFFile) familyType {
-
-	// identify from well known templates first
-	chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
-	if found && chatTemplate.ValueString() != "" {
-		if family, ok := knownTemplates[chatTemplate.ValueString()]; ok {
-			return family
-		}
-	}
-
-	// otherwise try to identify from the model properties
-	arch := f.Architecture().Architecture
-	eosTokenID := f.Tokenizer().EOSTokenID
-	bosTokenID := f.Tokenizer().BOSTokenID
-
-	isYI := arch == "llama" && bosTokenID == 1 && eosTokenID == 2
-	// WTF! Mistral0.3 and isYi have same bosTokenID and eosTokenID
-
-	llama3 := arch == "llama" && eosTokenID == 128009
-	commandR := arch == "command-r" && eosTokenID == 255001
-	qwen2 := arch == "qwen2"
-	phi3 := arch == "phi-3"
-	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma")
-	deepseek2 := arch == "deepseek2"
-
-	switch {
-	case deepseek2:
-		return DeepSeek2
-	case gemma:
-		return Gemma
-	case llama3:
-		return LLaMa3
-	case commandR:
-		return CommandR
-	case phi3:
-		return Phi3
-	case qwen2, isYI:
-		return ChatML
-	default:
-		return Unknown
-	}
+	// Instruct to use template from llama.cpp
+	cfg.TemplateConfig.UseTokenizerTemplate = true
+	cfg.FunctionsConfig.GrammarConfig.NoGrammar = true
+	cfg.Options = append(cfg.Options, "use_jinja:true")
+	cfg.KnownUsecaseStrings = append(cfg.KnownUsecaseStrings, "FLAG_CHAT")
 }
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -16,30 +16,31 @@ const (
 	RAND_SEED = -1
 )

+// @Description TTS configuration
 type TTSConfig struct {

 	// Voice wav path or id
-	Voice string `yaml:"voice" json:"voice"`
+	Voice string `yaml:"voice,omitempty" json:"voice,omitempty"`

-	AudioPath string `yaml:"audio_path" json:"audio_path"`
+	AudioPath string `yaml:"audio_path,omitempty" json:"audio_path,omitempty"`
 }

-// ModelConfig represents a model configuration
+// @Description ModelConfig represents a model configuration
 type ModelConfig struct {
 	modelConfigFile          string `yaml:"-" json:"-"`
-	schema.PredictionOptions `yaml:"parameters" json:"parameters"`
-	Name                     string `yaml:"name" json:"name"`
+	schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"`
+	Name                     string `yaml:"name,omitempty" json:"name,omitempty"`

-	F16                 *bool                `yaml:"f16" json:"f16"`
-	Threads             *int                 `yaml:"threads" json:"threads"`
-	Debug               *bool                `yaml:"debug" json:"debug"`
-	Roles               map[string]string    `yaml:"roles" json:"roles"`
-	Embeddings          *bool                `yaml:"embeddings" json:"embeddings"`
-	Backend             string               `yaml:"backend" json:"backend"`
-	TemplateConfig      TemplateConfig       `yaml:"template" json:"template"`
-	KnownUsecaseStrings []string             `yaml:"known_usecases" json:"known_usecases"`
+	F16                 *bool                `yaml:"f16,omitempty" json:"f16,omitempty"`
+	Threads             *int                 `yaml:"threads,omitempty" json:"threads,omitempty"`
+	Debug               *bool                `yaml:"debug,omitempty" json:"debug,omitempty"`
+	Roles               map[string]string    `yaml:"roles,omitempty" json:"roles,omitempty"`
+	Embeddings          *bool                `yaml:"embeddings,omitempty" json:"embeddings,omitempty"`
+	Backend             string               `yaml:"backend,omitempty" json:"backend,omitempty"`
+	TemplateConfig      TemplateConfig       `yaml:"template,omitempty" json:"template,omitempty"`
+	KnownUsecaseStrings []string             `yaml:"known_usecases,omitempty" json:"known_usecases,omitempty"`
 	KnownUsecases       *ModelConfigUsecases `yaml:"-" json:"-"`
-	Pipeline            Pipeline             `yaml:"pipeline" json:"pipeline"`
+	Pipeline            Pipeline             `yaml:"pipeline,omitempty" json:"pipeline,omitempty"`

 	PromptStrings, InputStrings                []string               `yaml:"-" json:"-"`
 	InputToken                                 [][]int                `yaml:"-" json:"-"`
@@ -47,96 +48,101 @@ type ModelConfig struct {
 	ResponseFormat                             string                 `yaml:"-" json:"-"`
 	ResponseFormatMap                          map[string]interface{} `yaml:"-" json:"-"`

-	FunctionsConfig functions.FunctionsConfig `yaml:"function" json:"function"`
+	FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`

-	FeatureFlag FeatureFlag `yaml:"feature_flags" json:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
+	FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
 	// LLM configs (GPT4ALL, Llama.cpp, ...)
 	LLMConfig `yaml:",inline" json:",inline"`

 	// Diffusers
-	Diffusers Diffusers `yaml:"diffusers" json:"diffusers"`
-	Step      int       `yaml:"step" json:"step"`
+	Diffusers Diffusers `yaml:"diffusers,omitempty" json:"diffusers,omitempty"`
+	Step      int       `yaml:"step,omitempty" json:"step,omitempty"`

 	// GRPC Options
-	GRPC GRPC `yaml:"grpc" json:"grpc"`
+	GRPC GRPC `yaml:"grpc,omitempty" json:"grpc,omitempty"`

 	// TTS specifics
-	TTSConfig `yaml:"tts" json:"tts"`
+	TTSConfig `yaml:"tts,omitempty" json:"tts,omitempty"`

 	// CUDA
 	// Explicitly enable CUDA or not (some backends might need it)
-	CUDA bool `yaml:"cuda" json:"cuda"`
+	CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`

-	DownloadFiles []File `yaml:"download_files" json:"download_files"`
+	DownloadFiles []File `yaml:"download_files,omitempty" json:"download_files,omitempty"`

-	Description string `yaml:"description" json:"description"`
-	Usage       string `yaml:"usage" json:"usage"`
+	Description string `yaml:"description,omitempty" json:"description,omitempty"`
+	Usage       string `yaml:"usage,omitempty" json:"usage,omitempty"`

-	Options   []string `yaml:"options" json:"options"`
-	Overrides []string `yaml:"overrides" json:"overrides"`
+	Options   []string `yaml:"options,omitempty" json:"options,omitempty"`
+	Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`

-	MCP   MCPConfig   `yaml:"mcp" json:"mcp"`
-	Agent AgentConfig `yaml:"agent" json:"agent"`
+	MCP   MCPConfig   `yaml:"mcp,omitempty" json:"mcp,omitempty"`
+	Agent AgentConfig `yaml:"agent,omitempty" json:"agent,omitempty"`
 }

+// @Description MCP configuration
 type MCPConfig struct {
-	Servers string `yaml:"remote" json:"remote"`
-	Stdio   string `yaml:"stdio" json:"stdio"`
+	Servers string `yaml:"remote,omitempty" json:"remote,omitempty"`
+	Stdio   string `yaml:"stdio,omitempty" json:"stdio,omitempty"`
 }

+// @Description Agent configuration
 type AgentConfig struct {
-	MaxAttempts           int  `yaml:"max_attempts" json:"max_attempts"`
-	MaxIterations         int  `yaml:"max_iterations" json:"max_iterations"`
-	EnableReasoning       bool `yaml:"enable_reasoning" json:"enable_reasoning"`
-	EnablePlanning        bool `yaml:"enable_planning" json:"enable_planning"`
-	EnableMCPPrompts      bool `yaml:"enable_mcp_prompts" json:"enable_mcp_prompts"`
-	EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator" json:"enable_plan_re_evaluator"`
+	MaxAttempts           int  `yaml:"max_attempts,omitempty" json:"max_attempts,omitempty"`
+	MaxIterations         int  `yaml:"max_iterations,omitempty" json:"max_iterations,omitempty"`
+	EnableReasoning       bool `yaml:"enable_reasoning,omitempty" json:"enable_reasoning,omitempty"`
+	EnablePlanning        bool `yaml:"enable_planning,omitempty" json:"enable_planning,omitempty"`
+	EnableMCPPrompts      bool `yaml:"enable_mcp_prompts,omitempty" json:"enable_mcp_prompts,omitempty"`
+	EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator,omitempty" json:"enable_plan_re_evaluator,omitempty"`
 }

-func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers]) {
+func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers], error) {
 	var remote MCPGenericConfig[MCPRemoteServers]
 	var stdio MCPGenericConfig[MCPSTDIOServers]

 	if err := yaml.Unmarshal([]byte(c.Servers), &remote); err != nil {
-		return remote, stdio
+		return remote, stdio, err
 	}

 	if err := yaml.Unmarshal([]byte(c.Stdio), &stdio); err != nil {
-		return remote, stdio
+		return remote, stdio, err
 	}
-
-	return remote, stdio
+	return remote, stdio, nil
 }

+// @Description MCP generic configuration
 type MCPGenericConfig[T any] struct {
-	Servers T `yaml:"mcpServers" json:"mcpServers"`
+	Servers T `yaml:"mcpServers,omitempty" json:"mcpServers,omitempty"`
 }
 type MCPRemoteServers map[string]MCPRemoteServer
 type MCPSTDIOServers map[string]MCPSTDIOServer

+// @Description MCP remote server configuration
 type MCPRemoteServer struct {
-	URL   string `json:"url"`
-	Token string `json:"token"`
+	URL   string `json:"url,omitempty"`
+	Token string `json:"token,omitempty"`
 }

+// @Description MCP STDIO server configuration
 type MCPSTDIOServer struct {
-	Args    []string          `json:"args"`
-	Env     map[string]string `json:"env"`
-	Command string            `json:"command"`
+	Args    []string          `json:"args,omitempty"`
+	Env     map[string]string `json:"env,omitempty"`
+	Command string            `json:"command,omitempty"`
 }

-// Pipeline defines other models to use for audio-to-audio
+// @Description Pipeline defines other models to use for audio-to-audio
 type Pipeline struct {
-	TTS           string `yaml:"tts" json:"tts"`
-	LLM           string `yaml:"llm" json:"llm"`
-	Transcription string `yaml:"transcription" json:"transcription"`
-	VAD           string `yaml:"vad" json:"vad"`
+	TTS           string `yaml:"tts,omitempty" json:"tts,omitempty"`
+	LLM           string `yaml:"llm,omitempty" json:"llm,omitempty"`
+	Transcription string `yaml:"transcription,omitempty" json:"transcription,omitempty"`
+	VAD           string `yaml:"vad,omitempty" json:"vad,omitempty"`
 }

+// @Description File configuration for model downloads
 type File struct {
-	Filename string         `yaml:"filename" json:"filename"`
-	SHA256   string         `yaml:"sha256" json:"sha256"`
-	URI      downloader.URI `yaml:"uri" json:"uri"`
+	Filename string         `yaml:"filename,omitempty" json:"filename,omitempty"`
+	SHA256   string         `yaml:"sha256,omitempty" json:"sha256,omitempty"`
+	URI      downloader.URI `yaml:"uri,omitempty" json:"uri,omitempty"`
 }

 type FeatureFlag map[string]*bool
@@ -148,126 +154,136 @@ func (ff FeatureFlag) Enabled(s string) bool {
 	return false
 }

+// @Description GRPC configuration
 type GRPC struct {
-	Attempts          int `yaml:"attempts" json:"attempts"`
-	AttemptsSleepTime int `yaml:"attempts_sleep_time" json:"attempts_sleep_time"`
+	Attempts          int `yaml:"attempts,omitempty" json:"attempts,omitempty"`
+	AttemptsSleepTime int `yaml:"attempts_sleep_time,omitempty" json:"attempts_sleep_time,omitempty"`
 }

+// @Description Diffusers configuration
 type Diffusers struct {
-	CUDA             bool   `yaml:"cuda" json:"cuda"`
-	PipelineType     string `yaml:"pipeline_type" json:"pipeline_type"`
-	SchedulerType    string `yaml:"scheduler_type" json:"scheduler_type"`
-	EnableParameters string `yaml:"enable_parameters" json:"enable_parameters"` // A list of comma separated parameters to specify
-	IMG2IMG          bool   `yaml:"img2img" json:"img2img"`                     // Image to Image Diffuser
-	ClipSkip         int    `yaml:"clip_skip" json:"clip_skip"`                 // Skip every N frames
-	ClipModel        string `yaml:"clip_model" json:"clip_model"`               // Clip model to use
-	ClipSubFolder    string `yaml:"clip_subfolder" json:"clip_subfolder"`       // Subfolder to use for clip model
-	ControlNet       string `yaml:"control_net" json:"control_net"`
+	CUDA             bool   `yaml:"cuda,omitempty" json:"cuda,omitempty"`
+	PipelineType     string `yaml:"pipeline_type,omitempty" json:"pipeline_type,omitempty"`
+	SchedulerType    string `yaml:"scheduler_type,omitempty" json:"scheduler_type,omitempty"`
+	EnableParameters string `yaml:"enable_parameters,omitempty" json:"enable_parameters,omitempty"` // A list of comma separated parameters to specify
+	IMG2IMG          bool   `yaml:"img2img,omitempty" json:"img2img,omitempty"`                     // Image to Image Diffuser
+	ClipSkip         int    `yaml:"clip_skip,omitempty" json:"clip_skip,omitempty"`                 // Skip every N frames
+	ClipModel        string `yaml:"clip_model,omitempty" json:"clip_model,omitempty"`               // Clip model to use
+	ClipSubFolder    string `yaml:"clip_subfolder,omitempty" json:"clip_subfolder,omitempty"`       // Subfolder to use for clip model
+	ControlNet       string `yaml:"control_net,omitempty" json:"control_net,omitempty"`
 }

-// LLMConfig is a struct that holds the configuration that are
-// generic for most of the LLM backends.
+// @Description LLMConfig is a struct that holds the configuration that are generic for most of the LLM backends.
 type LLMConfig struct {
-	SystemPrompt    string   `yaml:"system_prompt" json:"system_prompt"`
-	TensorSplit     string   `yaml:"tensor_split" json:"tensor_split"`
-	MainGPU         string   `yaml:"main_gpu" json:"main_gpu"`
-	RMSNormEps      float32  `yaml:"rms_norm_eps" json:"rms_norm_eps"`
-	NGQA            int32    `yaml:"ngqa" json:"ngqa"`
-	PromptCachePath string   `yaml:"prompt_cache_path" json:"prompt_cache_path"`
-	PromptCacheAll  bool     `yaml:"prompt_cache_all" json:"prompt_cache_all"`
-	PromptCacheRO   bool     `yaml:"prompt_cache_ro" json:"prompt_cache_ro"`
-	MirostatETA     *float64 `yaml:"mirostat_eta" json:"mirostat_eta"`
-	MirostatTAU     *float64 `yaml:"mirostat_tau" json:"mirostat_tau"`
-	Mirostat        *int     `yaml:"mirostat" json:"mirostat"`
-	NGPULayers      *int     `yaml:"gpu_layers" json:"gpu_layers"`
-	MMap            *bool    `yaml:"mmap" json:"mmap"`
-	MMlock          *bool    `yaml:"mmlock" json:"mmlock"`
-	LowVRAM         *bool    `yaml:"low_vram" json:"low_vram"`
-	Reranking       *bool    `yaml:"reranking" json:"reranking"`
-	Grammar         string   `yaml:"grammar" json:"grammar"`
-	StopWords       []string `yaml:"stopwords" json:"stopwords"`
-	Cutstrings      []string `yaml:"cutstrings" json:"cutstrings"`
-	ExtractRegex    []string `yaml:"extract_regex" json:"extract_regex"`
-	TrimSpace       []string `yaml:"trimspace" json:"trimspace"`
-	TrimSuffix      []string `yaml:"trimsuffix" json:"trimsuffix"`
+	SystemPrompt    string   `yaml:"system_prompt,omitempty" json:"system_prompt,omitempty"`
+	TensorSplit     string   `yaml:"tensor_split,omitempty" json:"tensor_split,omitempty"`
+	MainGPU         string   `yaml:"main_gpu,omitempty" json:"main_gpu,omitempty"`
+	RMSNormEps      float32  `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"`
+	NGQA            int32    `yaml:"ngqa,omitempty" json:"ngqa,omitempty"`
+	PromptCachePath string   `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"`
+	PromptCacheAll  bool     `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
+	PromptCacheRO   bool     `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"`
+	MirostatETA     *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"`
+	MirostatTAU     *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"`
+	Mirostat        *int     `yaml:"mirostat,omitempty" json:"mirostat,omitempty"`
+	NGPULayers      *int     `yaml:"gpu_layers,omitempty" json:"gpu_layers,omitempty"`
+	MMap            *bool    `yaml:"mmap,omitempty" json:"mmap,omitempty"`
+	MMlock          *bool    `yaml:"mmlock,omitempty" json:"mmlock,omitempty"`
+	LowVRAM         *bool    `yaml:"low_vram,omitempty" json:"low_vram,omitempty"`
+	Reranking       *bool    `yaml:"reranking,omitempty" json:"reranking,omitempty"`
+	Grammar         string   `yaml:"grammar,omitempty" json:"grammar,omitempty"`
+	StopWords       []string `yaml:"stopwords,omitempty" json:"stopwords,omitempty"`
+	Cutstrings      []string `yaml:"cutstrings,omitempty" json:"cutstrings,omitempty"`
+	ExtractRegex    []string `yaml:"extract_regex,omitempty" json:"extract_regex,omitempty"`
+	TrimSpace       []string `yaml:"trimspace,omitempty" json:"trimspace,omitempty"`
+	TrimSuffix      []string `yaml:"trimsuffix,omitempty" json:"trimsuffix,omitempty"`

-	ContextSize          *int             `yaml:"context_size" json:"context_size"`
-	NUMA                 bool             `yaml:"numa" json:"numa"`
-	LoraAdapter          string           `yaml:"lora_adapter" json:"lora_adapter"`
-	LoraBase             string           `yaml:"lora_base" json:"lora_base"`
-	LoraAdapters         []string         `yaml:"lora_adapters" json:"lora_adapters"`
-	LoraScales           []float32        `yaml:"lora_scales" json:"lora_scales"`
-	LoraScale            float32          `yaml:"lora_scale" json:"lora_scale"`
-	NoMulMatQ            bool             `yaml:"no_mulmatq" json:"no_mulmatq"`
-	DraftModel           string           `yaml:"draft_model" json:"draft_model"`
-	NDraft               int32            `yaml:"n_draft" json:"n_draft"`
-	Quantization         string           `yaml:"quantization" json:"quantization"`
-	LoadFormat           string           `yaml:"load_format" json:"load_format"`
-	GPUMemoryUtilization float32          `yaml:"gpu_memory_utilization" json:"gpu_memory_utilization"` // vLLM
-	TrustRemoteCode      bool             `yaml:"trust_remote_code" json:"trust_remote_code"`           // vLLM
-	EnforceEager         bool             `yaml:"enforce_eager" json:"enforce_eager"`                   // vLLM
-	SwapSpace            int              `yaml:"swap_space" json:"swap_space"`                         // vLLM
-	MaxModelLen          int              `yaml:"max_model_len" json:"max_model_len"`                   // vLLM
-	TensorParallelSize   int              `yaml:"tensor_parallel_size" json:"tensor_parallel_size"`     // vLLM
-	DisableLogStatus     bool             `yaml:"disable_log_stats" json:"disable_log_stats"`           // vLLM
-	DType                string           `yaml:"dtype" json:"dtype"`                                   // vLLM
-	LimitMMPerPrompt     LimitMMPerPrompt `yaml:"limit_mm_per_prompt" json:"limit_mm_per_prompt"`       // vLLM
-	MMProj               string           `yaml:"mmproj" json:"mmproj"`
+	ContextSize          *int             `yaml:"context_size,omitempty" json:"context_size,omitempty"`
+	NUMA                 bool             `yaml:"numa,omitempty" json:"numa,omitempty"`
+	LoraAdapter          string           `yaml:"lora_adapter,omitempty" json:"lora_adapter,omitempty"`
+	LoraBase             string           `yaml:"lora_base,omitempty" json:"lora_base,omitempty"`
+	LoraAdapters         []string         `yaml:"lora_adapters,omitempty" json:"lora_adapters,omitempty"`
+	LoraScales           []float32        `yaml:"lora_scales,omitempty" json:"lora_scales,omitempty"`
+	LoraScale            float32          `yaml:"lora_scale,omitempty" json:"lora_scale,omitempty"`
+	NoMulMatQ            bool             `yaml:"no_mulmatq,omitempty" json:"no_mulmatq,omitempty"`
+	DraftModel           string           `yaml:"draft_model,omitempty" json:"draft_model,omitempty"`
+	NDraft               int32            `yaml:"n_draft,omitempty" json:"n_draft,omitempty"`
+	Quantization         string           `yaml:"quantization,omitempty" json:"quantization,omitempty"`
+	LoadFormat           string           `yaml:"load_format,omitempty" json:"load_format,omitempty"`
+	GPUMemoryUtilization float32          `yaml:"gpu_memory_utilization,omitempty" json:"gpu_memory_utilization,omitempty"` // vLLM
+	TrustRemoteCode      bool             `yaml:"trust_remote_code,omitempty" json:"trust_remote_code,omitempty"`           // vLLM
+	EnforceEager         bool             `yaml:"enforce_eager,omitempty" json:"enforce_eager,omitempty"`                   // vLLM
+	SwapSpace            int              `yaml:"swap_space,omitempty" json:"swap_space,omitempty"`                         // vLLM
+	MaxModelLen          int              `yaml:"max_model_len,omitempty" json:"max_model_len,omitempty"`                   // vLLM
+	TensorParallelSize   int              `yaml:"tensor_parallel_size,omitempty" json:"tensor_parallel_size,omitempty"`     // vLLM
+	DisableLogStatus     bool             `yaml:"disable_log_stats,omitempty" json:"disable_log_stats,omitempty"`           // vLLM
+	DType                string           `yaml:"dtype,omitempty" json:"dtype,omitempty"`                                   // vLLM
+	LimitMMPerPrompt     LimitMMPerPrompt `yaml:"limit_mm_per_prompt,omitempty" json:"limit_mm_per_prompt,omitempty"`       // vLLM
+	MMProj               string           `yaml:"mmproj,omitempty" json:"mmproj,omitempty"`

-	FlashAttention *string `yaml:"flash_attention" json:"flash_attention"`
-	NoKVOffloading bool    `yaml:"no_kv_offloading" json:"no_kv_offloading"`
-	CacheTypeK     string  `yaml:"cache_type_k" json:"cache_type_k"`
-	CacheTypeV     string  `yaml:"cache_type_v" json:"cache_type_v"`
+	FlashAttention *string `yaml:"flash_attention,omitempty" json:"flash_attention,omitempty"`
+	NoKVOffloading bool    `yaml:"no_kv_offloading,omitempty" json:"no_kv_offloading,omitempty"`
+	CacheTypeK     string  `yaml:"cache_type_k,omitempty" json:"cache_type_k,omitempty"`
+	CacheTypeV     string  `yaml:"cache_type_v,omitempty" json:"cache_type_v,omitempty"`

-	RopeScaling string `yaml:"rope_scaling" json:"rope_scaling"`
-	ModelType   string `yaml:"type" json:"type"`
+	RopeScaling string `yaml:"rope_scaling,omitempty" json:"rope_scaling,omitempty"`
+	ModelType   string `yaml:"type,omitempty" json:"type,omitempty"`

-	YarnExtFactor  float32 `yaml:"yarn_ext_factor" json:"yarn_ext_factor"`
-	YarnAttnFactor float32 `yaml:"yarn_attn_factor" json:"yarn_attn_factor"`
-	YarnBetaFast   float32 `yaml:"yarn_beta_fast" json:"yarn_beta_fast"`
-	YarnBetaSlow   float32 `yaml:"yarn_beta_slow" json:"yarn_beta_slow"`
+	YarnExtFactor  float32 `yaml:"yarn_ext_factor,omitempty" json:"yarn_ext_factor,omitempty"`
+	YarnAttnFactor float32 `yaml:"yarn_attn_factor,omitempty" json:"yarn_attn_factor,omitempty"`
+	YarnBetaFast   float32 `yaml:"yarn_beta_fast,omitempty" json:"yarn_beta_fast,omitempty"`
+	YarnBetaSlow   float32 `yaml:"yarn_beta_slow,omitempty" json:"yarn_beta_slow,omitempty"`

-	CFGScale float32 `yaml:"cfg_scale" json:"cfg_scale"` // Classifier-Free Guidance Scale
+	CFGScale float32 `yaml:"cfg_scale,omitempty" json:"cfg_scale,omitempty"` // Classifier-Free Guidance Scale
 }

-// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
+// @Description LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
 type LimitMMPerPrompt struct {
-	LimitImagePerPrompt int `yaml:"image" json:"image"`
-	LimitVideoPerPrompt int `yaml:"video" json:"video"`
-	LimitAudioPerPrompt int `yaml:"audio" json:"audio"`
+	LimitImagePerPrompt int `yaml:"image,omitempty" json:"image,omitempty"`
+	LimitVideoPerPrompt int `yaml:"video,omitempty" json:"video,omitempty"`
+	LimitAudioPerPrompt int `yaml:"audio,omitempty" json:"audio,omitempty"`
 }

-// TemplateConfig is a struct that holds the configuration of the templating system
+// @Description TemplateConfig is a struct that holds the configuration of the templating system
 type TemplateConfig struct {
 	// Chat is the template used in the chat completion endpoint
-	Chat string `yaml:"chat" json:"chat"`
+	Chat string `yaml:"chat,omitempty" json:"chat,omitempty"`

 	// ChatMessage is the template used for chat messages
-	ChatMessage string `yaml:"chat_message" json:"chat_message"`
+	ChatMessage string `yaml:"chat_message,omitempty" json:"chat_message,omitempty"`

 	// Completion is the template used for completion requests
-	Completion string `yaml:"completion" json:"completion"`
+	Completion string `yaml:"completion,omitempty" json:"completion,omitempty"`

 	// Edit is the template used for edit completion requests
-	Edit string `yaml:"edit" json:"edit"`
+	Edit string `yaml:"edit,omitempty" json:"edit,omitempty"`

 	// Functions is the template used when tools are present in the client requests
-	Functions string `yaml:"function" json:"function"`
+	Functions string `yaml:"function,omitempty" json:"function,omitempty"`

 	// UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used.
 	// Note: this is mostly consumed for backends such as vllm and transformers
 	// that can use the tokenizers specified in the JSON config files of the models
-	UseTokenizerTemplate bool `yaml:"use_tokenizer_template" json:"use_tokenizer_template"`
+	UseTokenizerTemplate bool `yaml:"use_tokenizer_template,omitempty" json:"use_tokenizer_template,omitempty"`

 	// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
 	// It defaults to \n
-	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character" json:"join_chat_messages_by_character"`
+	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character,omitempty" json:"join_chat_messages_by_character,omitempty"`

-	Multimodal string `yaml:"multimodal" json:"multimodal"`
+	Multimodal string `yaml:"multimodal,omitempty" json:"multimodal,omitempty"`

-	JinjaTemplate bool `yaml:"jinja_template" json:"jinja_template"`
+	ReplyPrefix string `yaml:"reply_prefix,omitempty" json:"reply_prefix,omitempty"`
+}

-	ReplyPrefix string `yaml:"reply_prefix" json:"reply_prefix"`
+func (c *ModelConfig) syncKnownUsecasesFromString() {
+	c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
+	// Make sure the usecases are valid, we rewrite with what we identified
+	c.KnownUsecaseStrings = []string{}
+	for k, usecase := range GetAllModelConfigUsecases() {
+		if c.HasUsecases(usecase) {
+			c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k)
+		}
+	}
 }

 func (c *ModelConfig) UnmarshalYAML(value *yaml.Node) error {
@@ -278,14 +294,7 @@ func (c *ModelConfig) UnmarshalYAML(value *yaml.Node) error {
 	}
 	*c = ModelConfig(aux)

-	c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
-	// Make sure the usecases are valid, we rewrite with what we identified
-	c.KnownUsecaseStrings = []string{}
-	for k, usecase := range GetAllModelConfigUsecases() {
-		if c.HasUsecases(usecase) {
-			c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k)
-		}
-	}
+	c.syncKnownUsecasesFromString()
 	return nil
 }

@@ -462,6 +471,7 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	}

 	guessDefaultsFromFile(cfg, lo.modelPath, ctx)
+	cfg.syncKnownUsecasesFromString()
 }

 func (c *ModelConfig) Validate() bool {
@@ -492,7 +502,7 @@ func (c *ModelConfig) Validate() bool {
 }

 func (c *ModelConfig) HasTemplate() bool {
-	return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
+	return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != "" || c.TemplateConfig.UseTokenizerTemplate
 }

 func (c *ModelConfig) GetModelConfigFile() string {
@@ -573,7 +583,7 @@ func (c *ModelConfig) HasUsecases(u ModelConfigUsecases) bool {
 // This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
 func (c *ModelConfig) GuessUsecases(u ModelConfigUsecases) bool {
 	if (u & FLAG_CHAT) == FLAG_CHAT {
-		if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
+		if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" && !c.TemplateConfig.UseTokenizerTemplate {
 			return false
 		}
 	}
--- a/core/gallery/backends.go
+++ b/core/gallery/backends.go
@@ -4,6 +4,7 @@ package gallery

 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -310,8 +311,10 @@ func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error)
 				}
 			}
 		}
-	} else {
+	} else if !errors.Is(err, os.ErrNotExist) {
 		log.Warn().Err(err).Msg("Failed to read system backends, proceeding with user-managed backends")
+	} else if errors.Is(err, os.ErrNotExist) {
+		log.Debug().Msg("No system backends found")
 	}

 	// User-managed backends and alias collection
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -61,12 +61,15 @@ func (gm GalleryElements[T]) Search(term string) GalleryElements[T] {
 	term = strings.ToLower(term)
 	for _, m := range gm {
 		if fuzzy.Match(term, strings.ToLower(m.GetName())) ||
-			fuzzy.Match(term, strings.ToLower(m.GetDescription())) ||
 			fuzzy.Match(term, strings.ToLower(m.GetGallery().Name)) ||
+			strings.Contains(strings.ToLower(m.GetName()), term) ||
+			strings.Contains(strings.ToLower(m.GetDescription()), term) ||
+			strings.Contains(strings.ToLower(m.GetGallery().Name), term) ||
 			strings.Contains(strings.ToLower(strings.Join(m.GetTags(), ",")), term) {
 			filteredModels = append(filteredModels, m)
 		}
 	}
+
 	return filteredModels
 }

--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -0,0 +1,65 @@
+package importers
+
+import (
+	"encoding/json"
+	"strings"
+
+	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/core/gallery"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+)
+
+var DefaultImporters = []Importer{
+	&LlamaCPPImporter{},
+	&MLXImporter{},
+}
+
+type Details struct {
+	HuggingFace *hfapi.ModelDetails
+	URI         string
+	Preferences json.RawMessage
+}
+
+type Importer interface {
+	Match(details Details) bool
+	Import(details Details) (gallery.ModelConfig, error)
+}
+
+func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) {
+	var err error
+	var modelConfig gallery.ModelConfig
+
+	hf := hfapi.NewClient()
+
+	hfrepoID := strings.ReplaceAll(uri, "huggingface://", "")
+	hfrepoID = strings.ReplaceAll(hfrepoID, "hf://", "")
+	hfrepoID = strings.ReplaceAll(hfrepoID, "https://huggingface.co/", "")
+
+	hfDetails, err := hf.GetModelDetails(hfrepoID)
+	if err != nil {
+		// maybe not a HF repository
+		// TODO: maybe we can check if the URI is a valid HF repository
+		log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
+	} else {
+		log.Debug().Str("uri", uri).Msg("Got model details")
+		log.Debug().Any("details", hfDetails).Msg("Model details")
+	}
+
+	details := Details{
+		HuggingFace: hfDetails,
+		URI:         uri,
+		Preferences: preferences,
+	}
+
+	for _, importer := range DefaultImporters {
+		if importer.Match(details) {
+			modelConfig, err = importer.Import(details)
+			if err != nil {
+				continue
+			}
+			break
+		}
+	}
+	return modelConfig, err
+}
--- a/core/gallery/importers/importers_suite_test.go
+++ b/core/gallery/importers/importers_suite_test.go
@@ -0,0 +1,13 @@
+package importers_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestImporters(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Importers test suite")
+}
--- a/core/gallery/importers/importers_test.go
+++ b/core/gallery/importers/importers_test.go
@@ -0,0 +1,215 @@
+package importers_test
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("DiscoverModelConfig", func() {
+
+	Context("With only a repository URI", func() {
+		It("should discover and import using LlamaCPPImporter", func() {
+			uri := "https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+			Expect(modelConfig.Name).To(Equal("LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig))
+		})
+
+		It("should discover and import using LlamaCPPImporter", func() {
+			uri := "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+			Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: mmproj/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[1].Filename).To(Equal("mmproj/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
+		})
+
+		It("should discover and import using LlamaCPPImporter", func() {
+			uri := "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"
+			preferences := json.RawMessage(`{ "quantizations": "Q8_0", "mmproj_quantizations": "f16" }`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+			Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: mmproj/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[1].Filename).To(Equal("mmproj/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
+		})
+	})
+
+	Context("with .gguf URI", func() {
+		It("should discover and import using LlamaCPPImporter", func() {
+			uri := "https://example.com/my-model.gguf"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("my-model.gguf"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+		})
+
+		It("should use custom preferences when provided", func() {
+			uri := "https://example.com/my-model.gguf"
+			preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-name"))
+			Expect(modelConfig.Description).To(Equal("Custom description"))
+		})
+	})
+
+	Context("with mlx-community URI", func() {
+		It("should discover and import using MLXImporter", func() {
+			uri := "https://huggingface.co/mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+		})
+
+		It("should use custom preferences when provided", func() {
+			uri := "https://huggingface.co/mlx-community/test-model"
+			preferences := json.RawMessage(`{"name": "custom-mlx", "description": "Custom MLX description"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-mlx"))
+			Expect(modelConfig.Description).To(Equal("Custom MLX description"))
+		})
+	})
+
+	Context("with backend preference", func() {
+		It("should use llama-cpp backend when specified", func() {
+			uri := "https://example.com/model"
+			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+		})
+
+		It("should use mlx backend when specified", func() {
+			uri := "https://example.com/model"
+			preferences := json.RawMessage(`{"backend": "mlx"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+		})
+
+		It("should use mlx-vlm backend when specified", func() {
+			uri := "https://example.com/model"
+			preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
+		})
+	})
+
+	Context("with HuggingFace URI formats", func() {
+		It("should handle huggingface:// prefix", func() {
+			uri := "huggingface://mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+		})
+
+		It("should handle hf:// prefix", func() {
+			uri := "hf://mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+		})
+
+		It("should handle https://huggingface.co/ prefix", func() {
+			uri := "https://huggingface.co/mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+		})
+	})
+
+	Context("with invalid or non-matching URI", func() {
+		It("should return error when no importer matches", func() {
+			uri := "https://example.com/unknown-model.bin"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			// When no importer matches, the function returns empty config and error
+			// The exact behavior depends on implementation, but typically an error is returned
+			Expect(modelConfig.Name).To(BeEmpty())
+			Expect(err).To(HaveOccurred())
+		})
+	})
+
+	Context("with invalid JSON preferences", func() {
+		It("should return error when JSON is invalid even if URI matches", func() {
+			uri := "https://example.com/model.gguf"
+			preferences := json.RawMessage(`invalid json`)
+
+			// Even though Match() returns true for .gguf extension,
+			// Import() will fail when trying to unmarshal invalid JSON preferences
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).To(HaveOccurred())
+			Expect(modelConfig.Name).To(BeEmpty())
+		})
+	})
+})
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -0,0 +1,201 @@
+package importers
+
+import (
+	"encoding/json"
+	"path/filepath"
+	"slices"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
+	"go.yaml.in/yaml/v2"
+)
+
+var _ Importer = &LlamaCPPImporter{}
+
+type LlamaCPPImporter struct{}
+
+func (i *LlamaCPPImporter) Match(details Details) bool {
+	preferences, err := details.Preferences.MarshalJSON()
+	if err != nil {
+		return false
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return false
+	}
+
+	if preferencesMap["backend"] == "llama-cpp" {
+		return true
+	}
+
+	if strings.HasSuffix(details.URI, ".gguf") {
+		return true
+	}
+
+	if details.HuggingFace != nil {
+		for _, file := range details.HuggingFace.Files {
+			if strings.HasSuffix(file.Path, ".gguf") {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
+	preferences, err := details.Preferences.MarshalJSON()
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	name, ok := preferencesMap["name"].(string)
+	if !ok {
+		name = filepath.Base(details.URI)
+	}
+
+	description, ok := preferencesMap["description"].(string)
+	if !ok {
+		description = "Imported from " + details.URI
+	}
+
+	preferedQuantizations, _ := preferencesMap["quantizations"].(string)
+	quants := []string{"q4_k_m"}
+	if preferedQuantizations != "" {
+		quants = strings.Split(preferedQuantizations, ",")
+	}
+
+	mmprojQuants, _ := preferencesMap["mmproj_quantizations"].(string)
+	mmprojQuantsList := []string{"fp16"}
+	if mmprojQuants != "" {
+		mmprojQuantsList = strings.Split(mmprojQuants, ",")
+	}
+
+	modelConfig := config.ModelConfig{
+		Name:                name,
+		Description:         description,
+		KnownUsecaseStrings: []string{"chat"},
+		Backend:             "llama-cpp",
+		TemplateConfig: config.TemplateConfig{
+			UseTokenizerTemplate: true,
+		},
+		FunctionsConfig: functions.FunctionsConfig{
+			GrammarConfig: functions.GrammarConfig{
+				NoGrammar: true,
+			},
+		},
+	}
+
+	cfg := gallery.ModelConfig{
+		Name:        name,
+		Description: description,
+	}
+
+	if strings.HasSuffix(details.URI, ".gguf") {
+		cfg.Files = append(cfg.Files, gallery.File{
+			URI:      details.URI,
+			Filename: filepath.Base(details.URI),
+		})
+		modelConfig.PredictionOptions = schema.PredictionOptions{
+			BasicModelRequest: schema.BasicModelRequest{
+				Model: filepath.Base(details.URI),
+			},
+		}
+	} else if details.HuggingFace != nil {
+		// We want to:
+		// Get first the chosen quants that match filenames
+		// OR the first mmproj/gguf file found
+		var lastMMProjFile *gallery.File
+		var lastGGUFFile *gallery.File
+		foundPreferedQuant := false
+		foundPreferedMMprojQuant := false
+
+		for _, file := range details.HuggingFace.Files {
+			// Get the mmproj prefered quants
+			if strings.Contains(strings.ToLower(file.Path), "mmproj") {
+				lastMMProjFile = &gallery.File{
+					URI:      file.URL,
+					Filename: filepath.Join("mmproj", filepath.Base(file.Path)),
+					SHA256:   file.SHA256,
+				}
+				if slices.ContainsFunc(mmprojQuantsList, func(quant string) bool {
+					return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
+				}) {
+					cfg.Files = append(cfg.Files, *lastMMProjFile)
+					foundPreferedMMprojQuant = true
+				}
+			} else if strings.HasSuffix(strings.ToLower(file.Path), "gguf") {
+				lastGGUFFile = &gallery.File{
+					URI:      file.URL,
+					Filename: filepath.Base(file.Path),
+					SHA256:   file.SHA256,
+				}
+				// get the files of the prefered quants
+				if slices.ContainsFunc(quants, func(quant string) bool {
+					return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
+				}) {
+					foundPreferedQuant = true
+					cfg.Files = append(cfg.Files, *lastGGUFFile)
+				}
+			}
+		}
+
+		// Make sure to add at least one file if not already present (which is the latest one)
+		if lastMMProjFile != nil && !foundPreferedMMprojQuant {
+			if !slices.ContainsFunc(cfg.Files, func(f gallery.File) bool {
+				return f.Filename == lastMMProjFile.Filename
+			}) {
+				cfg.Files = append(cfg.Files, *lastMMProjFile)
+			}
+		}
+
+		if lastGGUFFile != nil && !foundPreferedQuant {
+			if !slices.ContainsFunc(cfg.Files, func(f gallery.File) bool {
+				return f.Filename == lastGGUFFile.Filename
+			}) {
+				cfg.Files = append(cfg.Files, *lastGGUFFile)
+			}
+		}
+
+		// Find first mmproj file and configure it in the config file
+		for _, file := range cfg.Files {
+			if !strings.Contains(strings.ToLower(file.Filename), "mmproj") {
+				continue
+			}
+			modelConfig.MMProj = file.Filename
+			break
+		}
+
+		// Find first non-mmproj file and configure it in the config file
+		for _, file := range cfg.Files {
+			if strings.Contains(strings.ToLower(file.Filename), "mmproj") {
+				continue
+			}
+			modelConfig.PredictionOptions = schema.PredictionOptions{
+				BasicModelRequest: schema.BasicModelRequest{
+					Model: file.Filename,
+				},
+			}
+			break
+		}
+
+	}
+
+	data, err := yaml.Marshal(modelConfig)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	cfg.ConfigFile = string(data)
+
+	return cfg, nil
+}
--- a/core/gallery/importers/llama-cpp_test.go
+++ b/core/gallery/importers/llama-cpp_test.go
@@ -0,0 +1,131 @@
+package importers_test
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("LlamaCPPImporter", func() {
+	var importer *importers.LlamaCPPImporter
+
+	BeforeEach(func() {
+		importer = &importers.LlamaCPPImporter{}
+	})
+
+	Context("Match", func() {
+		It("should match when URI ends with .gguf", func() {
+			details := importers.Details{
+				URI: "https://example.com/model.gguf",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when backend preference is llama-cpp", func() {
+			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should not match when URI does not end with .gguf and no backend preference", func() {
+			details := importers.Details{
+				URI: "https://example.com/model.bin",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should not match when backend preference is different", func() {
+			preferences := json.RawMessage(`{"backend": "mlx"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should return false when JSON preferences are invalid", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://example.com/model.gguf",
+				Preferences: preferences,
+			}
+
+			// Invalid JSON causes Match to return false early
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+	})
+
+	Context("Import", func() {
+		It("should import model config with default name and description", func() {
+			details := importers.Details{
+				URI: "https://example.com/my-model.gguf",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("my-model.gguf"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+		})
+
+		It("should import model config with custom name and description from preferences", func() {
+			preferences := json.RawMessage(`{"name": "custom-model", "description": "Custom description"}`)
+			details := importers.Details{
+				URI:         "https://example.com/my-model.gguf",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-model"))
+			Expect(modelConfig.Description).To(Equal("Custom description"))
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+		})
+
+		It("should handle invalid JSON preferences", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://example.com/my-model.gguf",
+				Preferences: preferences,
+			}
+
+			_, err := importer.Import(details)
+			Expect(err).To(HaveOccurred())
+		})
+
+		It("should extract filename correctly from URI with path", func() {
+			details := importers.Details{
+				URI: "https://example.com/path/to/model.gguf",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/path/to/model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+		})
+	})
+})
--- a/core/gallery/importers/mlx.go
+++ b/core/gallery/importers/mlx.go
@@ -0,0 +1,94 @@
+package importers
+
+import (
+	"encoding/json"
+	"path/filepath"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/schema"
+	"go.yaml.in/yaml/v2"
+)
+
+var _ Importer = &MLXImporter{}
+
+type MLXImporter struct{}
+
+func (i *MLXImporter) Match(details Details) bool {
+	preferences, err := details.Preferences.MarshalJSON()
+	if err != nil {
+		return false
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return false
+	}
+
+	b, ok := preferencesMap["backend"].(string)
+	if ok && b == "mlx" || b == "mlx-vlm" {
+		return true
+	}
+
+	// All https://huggingface.co/mlx-community/*
+	if strings.Contains(details.URI, "mlx-community/") {
+		return true
+	}
+
+	return false
+}
+
+func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) {
+	preferences, err := details.Preferences.MarshalJSON()
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	name, ok := preferencesMap["name"].(string)
+	if !ok {
+		name = filepath.Base(details.URI)
+	}
+
+	description, ok := preferencesMap["description"].(string)
+	if !ok {
+		description = "Imported from " + details.URI
+	}
+
+	backend := "mlx"
+	b, ok := preferencesMap["backend"].(string)
+	if ok {
+		backend = b
+	}
+
+	modelConfig := config.ModelConfig{
+		Name:                name,
+		Description:         description,
+		KnownUsecaseStrings: []string{"chat"},
+		Backend:             backend,
+		PredictionOptions: schema.PredictionOptions{
+			BasicModelRequest: schema.BasicModelRequest{
+				Model: details.URI,
+			},
+		},
+		TemplateConfig: config.TemplateConfig{
+			UseTokenizerTemplate: true,
+		},
+	}
+
+	data, err := yaml.Marshal(modelConfig)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	return gallery.ModelConfig{
+		Name:        name,
+		Description: description,
+		ConfigFile:  string(data),
+	}, nil
+}
--- a/core/gallery/importers/mlx_test.go
+++ b/core/gallery/importers/mlx_test.go
@@ -0,0 +1,147 @@
+package importers_test
+
+import (
+	"encoding/json"
+
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("MLXImporter", func() {
+	var importer *importers.MLXImporter
+
+	BeforeEach(func() {
+		importer = &importers.MLXImporter{}
+	})
+
+	Context("Match", func() {
+		It("should match when URI contains mlx-community/", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/mlx-community/test-model",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when backend preference is mlx", func() {
+			preferences := json.RawMessage(`{"backend": "mlx"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when backend preference is mlx-vlm", func() {
+			preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should not match when URI does not contain mlx-community/ and no backend preference", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/other-org/test-model",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should not match when backend preference is different", func() {
+			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should return false when JSON preferences are invalid", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			// Invalid JSON causes Match to return false early
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+	})
+
+	Context("Import", func() {
+		It("should import model config with default name and description", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/mlx-community/test-model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/mlx-community/test-model"))
+		})
+
+		It("should import model config with custom name and description from preferences", func() {
+			preferences := json.RawMessage(`{"name": "custom-mlx-model", "description": "Custom MLX description"}`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-mlx-model"))
+			Expect(modelConfig.Description).To(Equal("Custom MLX description"))
+		})
+
+		It("should use custom backend from preferences", func() {
+			preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
+		})
+
+		It("should handle invalid JSON preferences", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			_, err := importer.Import(details)
+			Expect(err).To(HaveOccurred())
+		})
+
+		It("should extract filename correctly from URI with path", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/mlx-community/path/to/model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("model"))
+		})
+	})
+})
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -200,11 +200,16 @@ func API(application *application.Application) (*fiber.App, error) {
 	requestExtractor := middleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())

 	routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
-	routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
+	
+	// Create opcache for tracking UI operations (used by both UI and LocalAI routes)
+	var opcache *services.OpCache
+	if !application.ApplicationConfig().DisableWebUI {
+		opcache = services.NewOpCache(application.GalleryService())
+	}
+	
+	routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
 	routes.RegisterOpenAIRoutes(router, requestExtractor, application)
 	if !application.ApplicationConfig().DisableWebUI {
-		// Create opcache for tracking UI operations
-		opcache := services.NewOpCache(application.GalleryService())
 		routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
 		routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
 	}
--- a/core/http/endpoints/localai/backend.go
+++ b/core/http/endpoints/localai/backend.go
@@ -76,7 +76,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) err
 		if err != nil {
 			return err
 		}
-		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
+		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
 			ID:                 uuid.String(),
 			GalleryElementName: input.ID,
 			Galleries:          mgs.galleries,
@@ -95,7 +95,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() func(c *fiber.Ctx) er
 	return func(c *fiber.Ctx) error {
 		backendName := c.Params("name")

-		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
+		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
 			Delete:             true,
 			GalleryElementName: backendName,
 			Galleries:          mgs.galleries,
--- a/core/http/endpoints/localai/edit_model.go
+++ b/core/http/endpoints/localai/edit_model.go
@@ -155,7 +155,7 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applicati
 		}

 		// Reload configurations
-		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath); err != nil {
+		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
 			response := ModelResponse{
 				Success: false,
 				Error:   "Failed to reload configurations: " + err.Error(),
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -77,7 +77,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
 		if err != nil {
 			return err
 		}
-		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			Req:                input.GalleryModel,
 			ID:                 uuid.String(),
 			GalleryElementName: input.ID,
@@ -98,7 +98,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 	return func(c *fiber.Ctx) error {
 		modelName := c.Params("name")

-		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			Delete:             true,
 			GalleryElementName: modelName,
 		}
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -2,16 +2,72 @@ package localai

 import (
 	"encoding/json"
+	"fmt"
 	"os"
 	"path/filepath"
 	"strings"

 	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	httpUtils "github.com/mudler/LocalAI/core/http/utils"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/utils"
+
 	"gopkg.in/yaml.v3"
 )

+// ImportModelURIEndpoint handles creating new model configurations from a URI
+func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.ImportModelRequest)
+
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelConfig, err := importers.DiscoverModelConfig(input.URI, input.Preferences)
+		if err != nil {
+			return fmt.Errorf("failed to discover model config: %w", err)
+		}
+
+		uuid, err := uuid.NewUUID()
+		if err != nil {
+			return err
+		}
+
+		// Determine gallery ID for tracking - use model name if available, otherwise use URI
+		galleryID := input.URI
+		if modelConfig.Name != "" {
+			galleryID = modelConfig.Name
+		}
+
+		// Register operation in opcache if available (for UI progress tracking)
+		if opcache != nil {
+			opcache.Set(galleryID, uuid.String())
+		}
+
+		galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+			Req: gallery.GalleryModel{
+				Overrides: map[string]interface{}{},
+			},
+			ID:                 uuid.String(),
+			GalleryElementName: galleryID,
+			GalleryElement:     &modelConfig,
+			BackendGalleries:   appConfig.BackendGalleries,
+		}
+
+		return c.JSON(schema.GalleryResponse{
+			ID:        uuid.String(),
+			StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()),
+		})
+	}
+}
+
 // ImportModelEndpoint handles creating new model configurations
 func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) fiber.Handler {
 	return func(c *fiber.Ctx) error {
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -2,9 +2,10 @@ package openai

 import (
 	"bufio"
-	"bytes"
+	"context"
 	"encoding/json"
 	"fmt"
+	"net"
 	"time"

 	"github.com/gofiber/fiber/v2"
@@ -22,6 +23,59 @@ import (
 	"github.com/valyala/fasthttp"
 )

+// NOTE: this is a bad WORKAROUND! We should find a better way to handle this.
+// Fasthttp doesn't support context cancellation from the caller
+// for non-streaming requests, so we need to monitor the connection directly.
+// Monitor connection for client disconnection during non-streaming requests
+// We access the connection directly via c.Context().Conn() to monitor it
+// during ComputeChoices execution, not after the response is sent
+// see: https://github.com/mudler/LocalAI/pull/7187#issuecomment-3506720906
+func handleConnectionCancellation(c *fiber.Ctx, cancelFunc func(), requestCtx context.Context) {
+	var conn net.Conn = c.Context().Conn()
+	if conn == nil {
+		return
+	}
+
+	go func() {
+		defer func() {
+			// Clear read deadline when goroutine exits
+			conn.SetReadDeadline(time.Time{})
+		}()
+
+		buf := make([]byte, 1)
+		// Use a short read deadline to periodically check if connection is closed
+		// Without a deadline, Read() would block indefinitely waiting for data
+		// that will never come (client is waiting for response, not sending more data)
+		ticker := time.NewTicker(100 * time.Millisecond)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-requestCtx.Done():
+				// Request completed or was cancelled - exit goroutine
+				return
+			case <-ticker.C:
+				// Set a short deadline - if connection is closed, read will fail immediately
+				// If connection is open but no data, it will timeout and we check again
+				conn.SetReadDeadline(time.Now().Add(50 * time.Millisecond))
+				_, err := conn.Read(buf)
+				if err != nil {
+					// Check if it's a timeout (connection still open, just no data)
+					if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
+						// Timeout is expected - connection is still open, just no data to read
+						// Continue the loop to check again
+						continue
+					}
+					// Connection closed or other error - cancel the context to stop gRPC call
+					log.Debug().Msgf("Calling cancellation function")
+					cancelFunc()
+					return
+				}
+			}
+		}
+	}()
+}
+
 // ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
 // @Summary Generate a chat completions for a given prompt and model.
 // @Param request body schema.OpenAIRequest true "query params"
@@ -36,7 +90,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			ID:      id,
 			Created: created,
 			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
+			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
 			Object:  "chat.completion.chunk",
 		}
 		responses <- initialMessage
@@ -56,7 +110,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 				Usage:   usage,
 			}
@@ -90,7 +144,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 			}
 			responses <- initialMessage
@@ -114,7 +168,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 				Usage:   usage,
 			}
@@ -142,7 +196,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									},
 								},
 							},
-						}}},
+						},
+						Index:        0,
+						FinishReason: nil,
+					}},
 					Object: "chat.completion.chunk",
 				}
 				responses <- initialMessage
@@ -165,7 +222,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									},
 								},
 							},
-						}}},
+						},
+						Index:        0,
+						FinishReason: nil,
+					}},
 					Object: "chat.completion.chunk",
 				}
 			}
@@ -217,6 +277,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			noActionDescription = config.FunctionsConfig.NoActionDescriptionName
 		}

+		// If we are using a response format, we need to generate a grammar for it
 		if config.ResponseFormatMap != nil {
 			d := schema.ChatCompletionResponseFormat{}
 			dat, err := json.Marshal(config.ResponseFormatMap)
@@ -260,6 +321,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		}

 		switch {
+		// Generates grammar with internal's LocalAI engine
 		case (!config.FunctionsConfig.GrammarConfig.NoGrammar || strictMode) && shouldUseFn:
 			noActionGrammar := functions.Function{
 				Name:        noActionName,
@@ -283,7 +345,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				funcs = funcs.Select(config.FunctionToCall())
 			}

-			// Update input grammar
+			// Update input grammar or json_schema based on use_llama_grammar option
 			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
 			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
 			if err == nil {
@@ -298,6 +360,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			} else {
 				log.Error().Err(err).Msg("Failed generating grammar")
 			}
+
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
@@ -316,7 +379,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator

 		// If we are using the tokenizer template, we don't need to process the messages
 		// unless we are processing functions
-		if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
+		if !config.TemplateConfig.UseTokenizerTemplate {
 			predInput = evaluator.TemplateMessages(*input, input.Messages, config, funcs, shouldUseFn)

 			log.Debug().Msgf("Prompt (after templating): %s", predInput)
@@ -355,6 +418,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			LOOP:
 				for {
 					select {
+					case <-input.Context.Done():
+						// Context was cancelled (client disconnected or request cancelled)
+						log.Debug().Msgf("Request context cancelled, stopping stream")
+						input.Cancel()
+						break LOOP
 					case ev := <-responses:
 						if len(ev.Choices) == 0 {
 							log.Debug().Msgf("No choices in the response, skipping")
@@ -364,11 +432,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						if len(ev.Choices[0].Delta.ToolCalls) > 0 {
 							toolsCalled = true
 						}
-						var buf bytes.Buffer
-						enc := json.NewEncoder(&buf)
-						enc.Encode(ev)
-						log.Debug().Msgf("Sending chunk: %s", buf.String())
-						_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
+						respData, err := json.Marshal(ev)
+						if err != nil {
+							log.Debug().Msgf("Failed to marshal response: %v", err)
+							input.Cancel()
+							continue
+						}
+						log.Debug().Msgf("Sending chunk: %s", string(respData))
+						_, err = fmt.Fprintf(w, "data: %s\n\n", string(respData))
 						if err != nil {
 							log.Debug().Msgf("Sending chunk failed: %v", err)
 							input.Cancel()
@@ -380,22 +451,28 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						}
 						log.Error().Msgf("Stream ended with error: %v", err)

+						stopReason := FinishReasonStop
 						resp := &schema.OpenAIResponse{
 							ID:      id,
 							Created: created,
 							Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 							Choices: []schema.Choice{
 								{
-									FinishReason: "stop",
+									FinishReason: &stopReason,
 									Index:        0,
 									Delta:        &schema.Message{Content: "Internal error: " + err.Error()},
 								}},
 							Object: "chat.completion.chunk",
 							Usage:  *usage,
 						}
-						respData, _ := json.Marshal(resp)
-
-						w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+						respData, marshalErr := json.Marshal(resp)
+						if marshalErr != nil {
+							log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
+							// Send a simple error message as fallback
+							w.WriteString("data: {\"error\":\"Internal error\"}\n\n")
+						} else {
+							w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+						}
 						w.WriteString("data: [DONE]\n\n")
 						w.Flush()

@@ -403,11 +480,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 					}
 				}

-				finishReason := "stop"
+				finishReason := FinishReasonStop
 				if toolsCalled && len(input.Tools) > 0 {
-					finishReason = "tool_calls"
+					finishReason = FinishReasonToolCalls
 				} else if toolsCalled {
-					finishReason = "function_call"
+					finishReason = FinishReasonFunctionCall
 				}

 				resp := &schema.OpenAIResponse{
@@ -416,9 +493,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []schema.Choice{
 						{
-							FinishReason: finishReason,
+							FinishReason: &finishReason,
 							Index:        0,
-							Delta:        &schema.Message{Content: &textContentToReturn},
+							Delta:        &schema.Message{},
 						}},
 					Object: "chat.completion.chunk",
 					Usage:  *usage,
@@ -439,7 +516,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			tokenCallback := func(s string, c *[]schema.Choice) {
 				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
-					*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+					stopReason := FinishReasonStop
+					*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 					return
 				}

@@ -457,12 +535,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						return
 					}

+					stopReason := FinishReasonStop
 					*c = append(*c, schema.Choice{
-						FinishReason: "stop",
+						FinishReason: &stopReason,
 						Message:      &schema.Message{Role: "assistant", Content: &result}})
 				default:
+					toolCallsReason := FinishReasonToolCalls
 					toolChoice := schema.Choice{
-						FinishReason: "tool_calls",
+						FinishReason: &toolCallsReason,
 						Message: &schema.Message{
 							Role: "assistant",
 						},
@@ -486,8 +566,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 							)
 						} else {
 							// otherwise we return more choices directly (deprecated)
+							functionCallReason := FinishReasonFunctionCall
 							*c = append(*c, schema.Choice{
-								FinishReason: "function_call",
+								FinishReason: &functionCallReason,
 								Message: &schema.Message{
 									Role:    "assistant",
 									Content: &textContentToReturn,
@@ -508,6 +589,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator

 			}

+			// NOTE: this is a workaround as fasthttp
+			// context cancellation does not fire in non-streaming requests
+			handleConnectionCancellation(c, input.Cancel, input.Context)
+
 			result, tokenUsage, err := ComputeChoices(
 				input,
 				predInput,
@@ -597,7 +682,23 @@ func handleQuestion(config *config.ModelConfig, cl *config.ModelConfigLoader, in
 		audios = append(audios, m.StringAudios...)
 	}

-	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, cl, o, nil)
+	// Serialize tools and tool_choice to JSON strings
+	toolsJSON := ""
+	if len(input.Tools) > 0 {
+		toolsBytes, err := json.Marshal(input.Tools)
+		if err == nil {
+			toolsJSON = string(toolsBytes)
+		}
+	}
+	toolChoiceJSON := ""
+	if input.ToolsChoice != nil {
+		toolChoiceBytes, err := json.Marshal(input.ToolsChoice)
+		if err == nil {
+			toolChoiceJSON = string(toolChoiceBytes)
+		}
+	}
+
+	predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, cl, o, nil, toolsJSON, toolChoiceJSON)
 	if err != nil {
 		log.Error().Err(err).Msg("model inference failed")
 		return "", err
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -2,7 +2,6 @@ package openai

 import (
 	"bufio"
-	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -47,8 +46,9 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []schema.Choice{
 					{
-						Index: 0,
-						Text:  s,
+						Index:        0,
+						Text:         s,
+						FinishReason: nil,
 					},
 				},
 				Object: "text_completion",
@@ -140,24 +140,49 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 							log.Debug().Msgf("No choices in the response, skipping")
 							continue
 						}
-						var buf bytes.Buffer
-						enc := json.NewEncoder(&buf)
-						enc.Encode(ev)
+						respData, err := json.Marshal(ev)
+						if err != nil {
+							log.Debug().Msgf("Failed to marshal response: %v", err)
+							continue
+						}

-						log.Debug().Msgf("Sending chunk: %s", buf.String())
-						fmt.Fprintf(w, "data: %v\n", buf.String())
+						log.Debug().Msgf("Sending chunk: %s", string(respData))
+						fmt.Fprintf(w, "data: %s\n\n", string(respData))
 						w.Flush()
 					case err := <-ended:
 						if err == nil {
 							break LOOP
 						}
 						log.Error().Msgf("Stream ended with error: %v", err)
-						fmt.Fprintf(w, "data: %v\n", "Internal error: "+err.Error())
+
+						stopReason := FinishReasonStop
+						errorResp := schema.OpenAIResponse{
+							ID:      id,
+							Created: created,
+							Model:   input.Model,
+							Choices: []schema.Choice{
+								{
+									Index:        0,
+									FinishReason: &stopReason,
+									Text:         "Internal error: " + err.Error(),
+								},
+							},
+							Object: "text_completion",
+						}
+						errorData, marshalErr := json.Marshal(errorResp)
+						if marshalErr != nil {
+							log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
+							// Send a simple error message as fallback
+							fmt.Fprintf(w, "data: {\"error\":\"Internal error\"}\n\n")
+						} else {
+							fmt.Fprintf(w, "data: %s\n\n", string(errorData))
+						}
 						w.Flush()
 						break LOOP
 					}
 				}

+				stopReason := FinishReasonStop
 				resp := &schema.OpenAIResponse{
 					ID:      id,
 					Created: created,
@@ -165,7 +190,7 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 					Choices: []schema.Choice{
 						{
 							Index:        0,
-							FinishReason: "stop",
+							FinishReason: &stopReason,
 						},
 					},
 					Object: "text_completion",
@@ -197,7 +222,8 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva

 			r, tokenUsage, err := ComputeChoices(
 				input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
-					*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
+					stopReason := FinishReasonStop
+					*c = append(*c, schema.Choice{Text: s, FinishReason: &stopReason, Index: k})
 				}, nil)
 			if err != nil {
 				return err
--- a/core/http/endpoints/openai/constants.go
+++ b/core/http/endpoints/openai/constants.go
@@ -0,0 +1,8 @@
+package openai
+
+// Finish reason constants for OpenAI API responses
+const (
+	FinishReasonStop         = "stop"
+	FinishReasonToolCalls    = "tool_calls"
+	FinishReasonFunctionCall = "function_call"
+)
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -1,6 +1,8 @@
 package openai

 import (
+	"encoding/json"
+
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"

@@ -37,8 +39,25 @@ func ComputeChoices(
 		audios = append(audios, m.StringAudios...)
 	}

+	// Serialize tools and tool_choice to JSON strings
+	toolsJSON := ""
+	if len(req.Tools) > 0 {
+		toolsBytes, err := json.Marshal(req.Tools)
+		if err == nil {
+			toolsJSON = string(toolsBytes)
+		}
+	}
+	toolChoiceJSON := ""
+	if req.ToolsChoice != nil {
+		toolChoiceBytes, err := json.Marshal(req.ToolsChoice)
+		if err == nil {
+			toolChoiceJSON = string(toolChoiceBytes)
+		}
+	}
+
 	// get the model function to call for the result
-	predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, bcl, o, tokenCallback)
+	predFunc, err := backend.ModelInference(
+		req.Context, predInput, req.Messages, images, videos, audios, loader, config, bcl, o, tokenCallback, toolsJSON, toolChoiceJSON)
 	if err != nil {
 		return result, backend.TokenUsage{}, err
 	}
--- a/core/http/endpoints/openai/mcp.go
+++ b/core/http/endpoints/openai/mcp.go
@@ -1,6 +1,7 @@
 package openai

 import (
+	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -50,12 +51,15 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		}

 		// Get MCP config from model config
-		remote, stdio := config.MCP.MCPConfigFromYAML()
+		remote, stdio, err := config.MCP.MCPConfigFromYAML()
+		if err != nil {
+			return fmt.Errorf("failed to get MCP config: %w", err)
+		}

 		// Check if we have tools in cache, or we have to have an initial connection
 		sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio)
 		if err != nil {
-			return err
+			return fmt.Errorf("failed to get MCP sessions: %w", err)
 		}

 		if len(sessions) == 0 {
@@ -73,6 +77,10 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		if appConfig.ApiKeys != nil {
 			apiKey = appConfig.ApiKeys[0]
 		}
+
+		ctxWithCancellation, cancel := context.WithCancel(ctx)
+		defer cancel()
+		handleConnectionCancellation(c, cancel, ctxWithCancellation)
 		// TODO: instead of connecting to the API, we should just wire this internally
 		// and act like completion.go.
 		// We can do this as cogito expects an interface and we can create one that
@@ -83,7 +91,7 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			cogito.WithStatusCallback(func(s string) {
 				log.Debug().Msgf("[model agent] [model: %s] Status: %s", config.Name, s)
 			}),
-			cogito.WithContext(ctx),
+			cogito.WithContext(ctxWithCancellation),
 			cogito.WithMCPs(sessions...),
 			cogito.WithIterations(3),  // default to 3 iterations
 			cogito.WithMaxAttempts(3), // default to 3 attempts
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -1072,7 +1072,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 		result, tokenUsage, err := ComputeChoices(input, prompt, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
 			if !shouldUseFn {
 				// no function is called, just reply and use stop as finish reason
-				*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+				stopReason := FinishReasonStop
+				*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 				return
 			}

@@ -1099,7 +1100,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 				}

 				if len(input.Tools) > 0 {
-					toolChoice.FinishReason = "tool_calls"
+					toolCallsReason := FinishReasonToolCalls
+					toolChoice.FinishReason = &toolCallsReason
 				}

 				for _, ss := range results {
@@ -1120,8 +1122,9 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 						)
 					} else {
 						// otherwise we return more choices directly
+						functionCallReason := FinishReasonFunctionCall
 						*c = append(*c, schema.Choice{
-							FinishReason: "function_call",
+							FinishReason: &functionCallReason,
 							Message: &schema.Message{
 								Role:    "assistant",
 								Content: &textContentToReturn,
--- a/core/http/middleware/request.go
+++ b/core/http/middleware/request.go
@@ -15,6 +15,7 @@ import (
 	"github.com/mudler/LocalAI/pkg/functions"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
+	"github.com/valyala/fasthttp"

 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
@@ -161,7 +162,18 @@ func (re *RequestExtractor) SetOpenAIRequest(ctx *fiber.Ctx) error {
 	correlationID := ctx.Get("X-Correlation-ID", uuid.New().String())
 	ctx.Set("X-Correlation-ID", correlationID)

+	//c1, cancel := context.WithCancel(re.applicationConfig.Context)
+	// Use the application context as parent to ensure cancellation on app shutdown
+	// We'll monitor the Fiber context separately and cancel our context when the request is canceled
 	c1, cancel := context.WithCancel(re.applicationConfig.Context)
+	// Monitor the Fiber context and cancel our context when it's canceled
+	// This ensures we respect request cancellation without causing panics
+	go func(fiberCtx *fasthttp.RequestCtx) {
+		if fiberCtx != nil {
+			<-fiberCtx.Done()
+			cancel()
+		}
+	}(ctx.Context())
 	// Add the correlation ID to the new context
 	ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)

--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -18,7 +18,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
 	cl *config.ModelConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
-	galleryService *services.GalleryService) {
+	galleryService *services.GalleryService,
+	opcache *services.OpCache) {

 	router.Get("/swagger/*", swagger.HandlerDefault) // default

@@ -57,6 +58,9 @@ func RegisterLocalAIRoutes(router *fiber.App,
 		// Custom model import endpoint
 		router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig))

+		// URI model import endpoint
+		router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache))
+
 		// Custom model edit endpoint
 		router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig))

--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -91,11 +91,15 @@ func RegisterUIRoutes(app *fiber.App,
 		}

 		title := "LocalAI - Chat"
+		var modelContextSize *int

 		for _, b := range modelConfigs {
 			if b.HasUsecases(config.FLAG_CHAT) {
 				modelThatCanBeUsed = b.Name
 				title = "LocalAI - Chat with " + modelThatCanBeUsed
+				if b.LLMConfig.ContextSize != nil {
+					modelContextSize = b.LLMConfig.ContextSize
+				}
 				break
 			}
 		}
@@ -107,6 +111,7 @@ func RegisterUIRoutes(app *fiber.App,
 			"GalleryConfig":       galleryConfigs,
 			"ModelsConfig":        modelConfigs,
 			"Model":               modelThatCanBeUsed,
+			"ContextSize":         modelContextSize,
 			"Version":             internal.PrintableVersion(),
 		}

@@ -120,6 +125,8 @@ func RegisterUIRoutes(app *fiber.App,
 		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)

 		galleryConfigs := map[string]*gallery.ModelConfig{}
+		modelName := c.Params("model")
+		var modelContextSize *int

 		for _, m := range modelConfigs {
 			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
@@ -127,15 +134,19 @@ func RegisterUIRoutes(app *fiber.App,
 				continue
 			}
 			galleryConfigs[m.Name] = cfg
+			if m.Name == modelName && m.LLMConfig.ContextSize != nil {
+				modelContextSize = m.LLMConfig.ContextSize
+			}
 		}

 		summary := fiber.Map{
-			"Title":               "LocalAI - Chat with " + c.Params("model"),
+			"Title":               "LocalAI - Chat with " + modelName,
 			"BaseURL":             utils.BaseURL(c),
 			"ModelsConfig":        modelConfigs,
 			"GalleryConfig":       galleryConfigs,
 			"ModelsWithoutConfig": modelsWithoutConfig,
-			"Model":               c.Params("model"),
+			"Model":               modelName,
+			"ContextSize":         modelContextSize,
 			"Version":             internal.PrintableVersion(),
 		}

--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -200,6 +200,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 				"jobID":           jobID,
 				"isDeletion":      isDeletionOp,
 				"trustRemoteCode": trustRemoteCodeExists,
+				"additionalFiles": m.AdditionalFiles,
 			})
 		}

@@ -247,7 +248,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 		uid := id.String()
 		opcache.Set(galleryID, uid)

-		op := services.GalleryOp[gallery.GalleryModel]{
+		op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			ID:                 uid,
 			GalleryElementName: galleryID,
 			Galleries:          appConfig.Galleries,
@@ -290,7 +291,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig

 		opcache.Set(galleryID, uid)

-		op := services.GalleryOp[gallery.GalleryModel]{
+		op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			ID:                 uid,
 			Delete:             true,
 			GalleryElementName: galleryName,
@@ -525,7 +526,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 		uid := id.String()
 		opcache.Set(backendID, uid)

-		op := services.GalleryOp[gallery.GalleryBackend]{
+		op := services.GalleryOp[gallery.GalleryBackend, any]{
 			ID:                 uid,
 			GalleryElementName: backendID,
 			Galleries:          appConfig.BackendGalleries,
@@ -567,7 +568,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig

 		opcache.Set(backendID, uid)

-		op := services.GalleryOp[gallery.GalleryBackend]{
+		op := services.GalleryOp[gallery.GalleryBackend, any]{
 			ID:                 uid,
 			Delete:             true,
 			GalleryElementName: backendName,
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -27,21 +27,85 @@ SOFTWARE.

 */

+// Global variable to store the current AbortController
+let currentAbortController = null;
+let currentReader = null;
+let requestStartTime = null;
+let tokensReceived = 0;
+let tokensPerSecondInterval = null;
+let lastTokensPerSecond = null; // Store the last calculated rate
+
 function toggleLoader(show) {
-  const loader = document.getElementById('loader');
  const sendButton = document.getElementById('send-button');
+  const stopButton = document.getElementById('stop-button');
+  const headerLoadingIndicator = document.getElementById('header-loading-indicator');
+  const tokensPerSecondDisplay = document.getElementById('tokens-per-second');
  
  if (show) {
-    loader.style.display = 'block';
    sendButton.style.display = 'none';
-    document.getElementById("input").disabled = true;
+    stopButton.style.display = 'block';
+    if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'block';
+    // Reset token tracking
+    requestStartTime = Date.now();
+    tokensReceived = 0;
+    
+    // Start updating tokens/second display
+    if (tokensPerSecondDisplay) {
+      tokensPerSecondDisplay.textContent = '-';
+      updateTokensPerSecond();
+      tokensPerSecondInterval = setInterval(updateTokensPerSecond, 500); // Update every 500ms
+    }
  } else {
-    document.getElementById("input").disabled = false;
-    loader.style.display = 'none';
    sendButton.style.display = 'block';
+    stopButton.style.display = 'none';
+    if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'none';
+    // Stop updating but keep the last value visible
+    if (tokensPerSecondInterval) {
+      clearInterval(tokensPerSecondInterval);
+      tokensPerSecondInterval = null;
+    }
+    // Keep the last calculated rate visible
+    if (tokensPerSecondDisplay && lastTokensPerSecond !== null) {
+      tokensPerSecondDisplay.textContent = lastTokensPerSecond;
+    }
+    currentAbortController = null;
+    currentReader = null;
+    requestStartTime = null;
+    tokensReceived = 0;
  }
 }

+function updateTokensPerSecond() {
+  const tokensPerSecondDisplay = document.getElementById('tokens-per-second');
+  if (!tokensPerSecondDisplay || !requestStartTime) return;
+  
+  const elapsedSeconds = (Date.now() - requestStartTime) / 1000;
+  if (elapsedSeconds > 0 && tokensReceived > 0) {
+    const rate = tokensReceived / elapsedSeconds;
+    const formattedRate = `${rate.toFixed(1)} tokens/s`;
+    tokensPerSecondDisplay.textContent = formattedRate;
+    lastTokensPerSecond = formattedRate; // Store the last calculated rate
+  } else if (elapsedSeconds > 0) {
+    tokensPerSecondDisplay.textContent = '-';
+  }
+}
+
+function stopRequest() {
+  if (currentAbortController) {
+    currentAbortController.abort();
+    currentAbortController = null;
+  }
+  if (currentReader) {
+    currentReader.cancel();
+    currentReader = null;
+  }
+  toggleLoader(false);
+  Alpine.store("chat").add(
+    "assistant",
+    `<span class='error'>Request cancelled by user</span>`,
+  );
+}
+
 function processThinkingTags(content) {
  const thinkingRegex = /<thinking>(.*?)<\/thinking>|<think>(.*?)<\/think>/gs;
  const parts = content.split(thinkingRegex);
@@ -149,9 +213,30 @@ function readInputFile() {

 function submitPrompt(event) {
  event.preventDefault();
+  
+  const input = document.getElementById("input");
+  if (!input) return;

-  const input = document.getElementById("input").value;
-  let fullInput = input;
+  const inputValue = input.value;
+  if (!inputValue.trim()) return; // Don't send empty messages
+
+  // If already processing, abort the current request and send the new one
+  if (currentAbortController || currentReader) {
+    // Abort current request
+    stopRequest();
+    // Small delay to ensure cleanup completes
+    setTimeout(() => {
+      // Continue with new request
+      processAndSendMessage(inputValue);
+    }, 100);
+    return;
+  }
+  
+  processAndSendMessage(inputValue);
+}
+
+function processAndSendMessage(inputValue) {
+  let fullInput = inputValue;
  
  // If there are file contents, append them to the input for the LLM
  if (fileContents.length > 0) {
@@ -162,7 +247,7 @@ function submitPrompt(event) {
  }
  
  // Show file icons in chat if there are files
-  let displayContent = input;
+  let displayContent = inputValue;
  if (currentFileNames.length > 0) {
    displayContent += "\n\n";
    currentFileNames.forEach(fileName => {
@@ -179,9 +264,15 @@ function submitPrompt(event) {
    history[history.length - 1].content = fullInput;
  }
  
-  document.getElementById("input").value = "";
+  const input = document.getElementById("input");
+  if (input) input.value = "";
  const systemPrompt = localStorage.getItem("system_prompt");
  Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
+  
+  // Reset token tracking before starting new request
+  requestStartTime = Date.now();
+  tokensReceived = 0;
+  
  promptGPT(systemPrompt, fullInput);
  
  // Reset file contents and names after sending
@@ -220,6 +311,12 @@ function readInputAudio() {
 async function promptGPT(systemPrompt, input) {
  const model = document.getElementById("chat-model").value;
  const mcpMode = Alpine.store("chat").mcpMode;
+  
+  // Reset current request usage tracking for new request
+  if (Alpine.store("chat")) {
+    Alpine.store("chat").tokenUsage.currentRequest = null;
+  }
+  
  toggleLoader(true);

  messages = Alpine.store("chat").messages();
@@ -295,8 +392,9 @@ async function promptGPT(systemPrompt, input) {
  
  let response;
  try {
-    // Create AbortController for timeout handling
+    // Create AbortController for timeout handling and stop button
    const controller = new AbortController();
+    currentAbortController = controller; // Store globally so stop button can abort it
    const timeoutId = setTimeout(() => controller.abort(), mcpMode ? 300000 : 30000); // 5 minutes for MCP, 30 seconds for regular
    
    response = await fetch(endpoint, {
@@ -311,11 +409,20 @@ async function promptGPT(systemPrompt, input) {
    
    clearTimeout(timeoutId);
  } catch (error) {
+    // Don't show error if request was aborted by user (stop button)
    if (error.name === 'AbortError') {
-      Alpine.store("chat").add(
-        "assistant",
-        `<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
-      );
+      // Check if this was a user-initiated abort (stop button was clicked)
+      // If currentAbortController is null, it means stopRequest() was called and already handled the UI
+      if (!currentAbortController) {
+        // User clicked stop button - error message already shown by stopRequest()
+        return;
+      } else {
+        // Timeout error (controller was aborted by timeout, not user)
+        Alpine.store("chat").add(
+          "assistant",
+          `<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
+        );
+      }
    } else {
      Alpine.store("chat").add(
        "assistant",
@@ -323,6 +430,7 @@ async function promptGPT(systemPrompt, input) {
      );
    }
    toggleLoader(false);
+    currentAbortController = null;
    return;
  }

@@ -332,6 +440,7 @@ async function promptGPT(systemPrompt, input) {
      `<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
    );
    toggleLoader(false);
+    currentAbortController = null;
    return;
  }

@@ -339,10 +448,35 @@ async function promptGPT(systemPrompt, input) {
    // Handle MCP non-streaming response
    try {
      const data = await response.json();
-      // MCP endpoint returns content in choices[0].text, not choices[0].message.content
-      const content = data.choices[0]?.text || "";
+      
+      // Update token usage if present
+      if (data.usage) {
+        Alpine.store("chat").updateTokenUsage(data.usage);
+      }
+      
+      // MCP endpoint returns content in choices[0].message.content (chat completion format)
+      // Fallback to choices[0].text for backward compatibility (completion format)
+      const content = data.choices[0]?.message?.content || data.choices[0]?.text || "";
+      
+      if (!content && (!data.choices || data.choices.length === 0)) {
+        Alpine.store("chat").add(
+          "assistant",
+          `<span class='error'>Error: Empty response from MCP endpoint</span>`,
+        );
+        toggleLoader(false);
+        return;
+      }
      
      if (content) {
+        // Count tokens for rate calculation (MCP mode - full content at once)
+        // Prefer actual token count from API if available
+        if (data.usage && data.usage.completion_tokens) {
+          tokensReceived = data.usage.completion_tokens;
+        } else {
+          tokensReceived += Math.ceil(content.length / 4);
+        }
+        updateTokensPerSecond();
+        
        // Process thinking tags using shared function
        const { regularContent, thinkingContent } = processThinkingTags(content);
        
@@ -360,10 +494,15 @@ async function promptGPT(systemPrompt, input) {
      // Highlight all code blocks
      hljs.highlightAll();
    } catch (error) {
-      Alpine.store("chat").add(
-        "assistant",
-        `<span class='error'>Error: Failed to parse MCP response</span>`,
-      );
+      // Don't show error if request was aborted by user
+      if (error.name !== 'AbortError' || currentAbortController) {
+        Alpine.store("chat").add(
+          "assistant",
+          `<span class='error'>Error: Failed to parse MCP response</span>`,
+        );
+      }
+    } finally {
+      currentAbortController = null;
    }
  } else {
    // Handle regular streaming response
@@ -376,13 +515,20 @@ async function promptGPT(systemPrompt, input) {
        "assistant",
        `<span class='error'>Error: Failed to decode API response</span>`,
      );
+      toggleLoader(false);
      return;
    }

+    // Store reader globally so stop button can cancel it
+    currentReader = reader;
+
    // Function to add content to the chat and handle DOM updates efficiently
    const addToChat = (token) => {
      const chatStore = Alpine.store("chat");
      chatStore.add("assistant", token);
+      // Count tokens for rate calculation (rough estimate: count characters/4)
+      tokensReceived += Math.ceil(token.length / 4);
+      updateTokensPerSecond();
      // Efficiently scroll into view without triggering multiple reflows
      // const messages = document.getElementById('messages');
      // messages.scrollTop = messages.scrollHeight;
@@ -413,6 +559,12 @@ async function promptGPT(systemPrompt, input) {
          if (line.startsWith("data: ")) {
            try {
              const jsonData = JSON.parse(line.substring(6));
+              
+              // Update token usage if present
+              if (jsonData.usage) {
+                Alpine.store("chat").updateTokenUsage(jsonData.usage);
+              }
+              
              const token = jsonData.choices[0].delta.content;

              if (token) {
@@ -437,6 +589,9 @@ async function promptGPT(systemPrompt, input) {
                // Handle content based on thinking state
                if (isThinking) {
                  thinkingContent += token;
+                  // Count tokens for rate calculation
+                  tokensReceived += Math.ceil(token.length / 4);
+                  updateTokensPerSecond();
                  // Update the last thinking message or create a new one
                  if (lastThinkingMessageIndex === -1) {
                    // Create new thinking message
@@ -479,13 +634,20 @@ async function promptGPT(systemPrompt, input) {
      // Highlight all code blocks once at the end
      hljs.highlightAll();
    } catch (error) {
-      Alpine.store("chat").add(
-        "assistant",
-        `<span class='error'>Error: Failed to process stream</span>`,
-      );
+      // Don't show error if request was aborted by user
+      if (error.name !== 'AbortError' || !currentAbortController) {
+        Alpine.store("chat").add(
+          "assistant",
+          `<span class='error'>Error: Failed to process stream</span>`,
+        );
+      }
    } finally {
      // Perform any cleanup if necessary
-      reader.releaseLock();
+      if (reader) {
+        reader.releaseLock();
+      }
+      currentReader = null;
+      currentAbortController = null;
    }
  }

@@ -518,14 +680,71 @@ marked.setOptions({
  },
 });

+// Alpine store is now initialized in chat.html inline script to ensure it's available before Alpine processes the DOM
+// Only initialize if not already initialized (to avoid duplicate initialization)
 document.addEventListener("alpine:init", () => {
-  Alpine.store("chat", {
+  // Check if store already exists (initialized in chat.html)
+  if (!Alpine.store("chat")) {
+    // Fallback initialization (should not be needed if chat.html loads correctly)
+    Alpine.store("chat", {
    history: [],
    languages: [undefined],
    systemPrompt: "",
    mcpMode: false,
+    contextSize: null,
+    tokenUsage: {
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      currentRequest: null
+    },
    clear() {
      this.history.length = 0;
+      this.tokenUsage = {
+        promptTokens: 0,
+        completionTokens: 0,
+        totalTokens: 0,
+        currentRequest: null
+      };
+    },
+    updateTokenUsage(usage) {
+      // Usage values in streaming responses are cumulative totals for the current request
+      // We track session totals separately and only update when we see new (higher) values
+      if (usage) {
+        const currentRequest = this.tokenUsage.currentRequest || {
+          promptTokens: 0,
+          completionTokens: 0,
+          totalTokens: 0
+        };
+        
+        // Check if this is a new/updated usage (values increased)
+        const isNewUsage = 
+          (usage.prompt_tokens !== undefined && usage.prompt_tokens > currentRequest.promptTokens) ||
+          (usage.completion_tokens !== undefined && usage.completion_tokens > currentRequest.completionTokens) ||
+          (usage.total_tokens !== undefined && usage.total_tokens > currentRequest.totalTokens);
+        
+        if (isNewUsage) {
+          // Update session totals: subtract old request usage, add new
+          this.tokenUsage.promptTokens = this.tokenUsage.promptTokens - currentRequest.promptTokens + (usage.prompt_tokens || 0);
+          this.tokenUsage.completionTokens = this.tokenUsage.completionTokens - currentRequest.completionTokens + (usage.completion_tokens || 0);
+          this.tokenUsage.totalTokens = this.tokenUsage.totalTokens - currentRequest.totalTokens + (usage.total_tokens || 0);
+          
+          // Store current request usage
+          this.tokenUsage.currentRequest = {
+            promptTokens: usage.prompt_tokens || 0,
+            completionTokens: usage.completion_tokens || 0,
+            totalTokens: usage.total_tokens || 0
+          };
+        }
+      }
+    },
+    getRemainingTokens() {
+      if (!this.contextSize) return null;
+      return Math.max(0, this.contextSize - this.tokenUsage.totalTokens);
+    },
+    getContextUsagePercent() {
+      if (!this.contextSize) return null;
+      return Math.min(100, (this.tokenUsage.totalTokens / this.contextSize) * 100);
    },
    add(role, content, image, audio) {
      const N = this.history.length - 1;
@@ -590,5 +809,6 @@ document.addEventListener("alpine:init", () => {
        audio: message.audio,
      }));
    },
-  });
+    });
+  }
 });
--- a/core/http/views/backends.html
+++ b/core/http/views/backends.html
@@ -305,7 +305,7 @@
                                     class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3" 
                                     loading="lazy">
                            </div>
-                            <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-text="selectedBackend?.description"></p>
+                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full markdown-content" x-html="renderMarkdown(selectedBackend?.description)"></div>
                            <template x-if="selectedBackend?.tags && selectedBackend.tags.length > 0">
                                <div>
                                    <p class="text-sm mb-3 font-semibold text-gray-900 dark:text-white">Tags</p>
@@ -439,6 +439,42 @@ tbody tr:last-child td:first-child {
 tbody tr:last-child td:last-child {
    border-bottom-right-radius: 1rem;
 }
+
+/* Markdown content overflow handling */
+.markdown-content {
+    word-wrap: break-word;
+    overflow-wrap: anywhere;
+    max-width: 100%;
+}
+
+.markdown-content pre {
+    overflow-x: auto;
+    max-width: 100%;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+
+.markdown-content code {
+    word-wrap: break-word;
+    overflow-wrap: break-word;
+}
+
+.markdown-content pre code {
+    white-space: pre;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content table {
+    max-width: 100%;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content img {
+    max-width: 100%;
+    height: auto;
+}
 </style>

 <script>
@@ -599,6 +635,20 @@ function backendsGallery() {
            }
        },
        
+        renderMarkdown(text) {
+            if (!text) return '';
+            try {
+                if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
+                    return text; // Return plain text if libraries not loaded
+                }
+                const html = marked.parse(text);
+                return DOMPurify.sanitize(html);
+            } catch (error) {
+                console.error('Error rendering markdown:', error);
+                return text;
+            }
+        },
+        
        openModal(backend) {
            this.selectedBackend = backend;
        },
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -28,12 +28,167 @@ SOFTWARE.
 <!doctype html>
 <html lang="en">
  {{template "views/partials/head" .}}
-  <script defer src="static/chat.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
  <script>
    // Initialize PDF.js worker
    pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
  </script>
+  <script>
+    // Initialize Alpine store - must run before Alpine processes DOM
+    // Get context size from template
+    var __chatContextSize = null;
+    {{ if .ContextSize }}
+    __chatContextSize = {{ .ContextSize }};
+    {{ end }}
+
+    // Function to initialize store
+    function __initChatStore() {
+      if (!window.Alpine) return;
+      if (Alpine.store("chat")) {
+        Alpine.store("chat").contextSize = __chatContextSize;
+        return;
+      }
+
+      Alpine.store("chat", {
+        history: [],
+        languages: [undefined],
+        systemPrompt: "",
+        mcpMode: false,
+        contextSize: __chatContextSize,
+        tokenUsage: {
+          promptTokens: 0,
+          completionTokens: 0,
+          totalTokens: 0,
+          currentRequest: null
+        },
+        clear() {
+          this.history.length = 0;
+          this.tokenUsage = {
+            promptTokens: 0,
+            completionTokens: 0,
+            totalTokens: 0,
+            currentRequest: null
+          };
+        },
+        updateTokenUsage(usage) {
+          // Usage values in streaming responses are cumulative totals for the current request
+          // We track session totals separately and only update when we see new (higher) values
+          if (usage) {
+            const currentRequest = this.tokenUsage.currentRequest || {
+              promptTokens: 0,
+              completionTokens: 0,
+              totalTokens: 0
+            };
+            
+            // Check if this is a new/updated usage (values increased)
+            const isNewUsage = 
+              (usage.prompt_tokens !== undefined && usage.prompt_tokens > currentRequest.promptTokens) ||
+              (usage.completion_tokens !== undefined && usage.completion_tokens > currentRequest.completionTokens) ||
+              (usage.total_tokens !== undefined && usage.total_tokens > currentRequest.totalTokens);
+            
+            if (isNewUsage) {
+              // Update session totals: subtract old request usage, add new
+              this.tokenUsage.promptTokens = this.tokenUsage.promptTokens - currentRequest.promptTokens + (usage.prompt_tokens || 0);
+              this.tokenUsage.completionTokens = this.tokenUsage.completionTokens - currentRequest.completionTokens + (usage.completion_tokens || 0);
+              this.tokenUsage.totalTokens = this.tokenUsage.totalTokens - currentRequest.totalTokens + (usage.total_tokens || 0);
+              
+              // Store current request usage
+              this.tokenUsage.currentRequest = {
+                promptTokens: usage.prompt_tokens || 0,
+                completionTokens: usage.completion_tokens || 0,
+                totalTokens: usage.total_tokens || 0
+              };
+            }
+          }
+        },
+        getRemainingTokens() {
+          if (!this.contextSize) return null;
+          return Math.max(0, this.contextSize - this.tokenUsage.totalTokens);
+        },
+        getContextUsagePercent() {
+          if (!this.contextSize) return null;
+          return Math.min(100, (this.tokenUsage.totalTokens / this.contextSize) * 100);
+        },
+        add(role, content, image, audio) {
+          const N = this.history.length - 1;
+          // For thinking messages, always create a new message
+          if (role === "thinking") {
+            let c = "";
+            const lines = content.split("\n");
+            lines.forEach((line) => {
+              c += DOMPurify.sanitize(marked.parse(line));
+            });
+            this.history.push({ role, content, html: c, image, audio });
+          }
+          // For other messages, merge if same role
+          else if (this.history.length && this.history[N].role === role) {
+            this.history[N].content += content;
+            this.history[N].html = DOMPurify.sanitize(
+              marked.parse(this.history[N].content)
+            );
+            // Merge new images and audio with existing ones
+            if (image && image.length > 0) {
+              this.history[N].image = [...(this.history[N].image || []), ...image];
+            }
+            if (audio && audio.length > 0) {
+              this.history[N].audio = [...(this.history[N].audio || []), ...audio];
+            }
+          } else {
+            let c = "";
+            const lines = content.split("\n");
+            lines.forEach((line) => {
+              c += DOMPurify.sanitize(marked.parse(line));
+            });
+            this.history.push({ 
+              role, 
+              content, 
+              html: c, 
+              image: image || [], 
+              audio: audio || [] 
+            });
+          }
+          document.getElementById('messages').scrollIntoView(false);
+          const parser = new DOMParser();
+          const html = parser.parseFromString(
+            this.history[this.history.length - 1].html,
+            "text/html"
+          );
+          const code = html.querySelectorAll("pre code");
+          if (!code.length) return;
+          code.forEach((el) => {
+            const language = el.className.split("language-")[1];
+            if (this.languages.includes(language)) return;
+            const script = document.createElement("script");
+            script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
+            document.head.appendChild(script);
+            this.languages.push(language);
+          });
+        },
+        messages() {
+          return this.history.map((message) => ({
+            role: message.role,
+            content: message.content,
+            image: message.image,
+            audio: message.audio,
+          }));
+        },
+      });
+    }
+
+    // Register listener immediately (before Alpine loads)
+    document.addEventListener("alpine:init", __initChatStore);
+    
+    // Also try immediately in case Alpine is already loaded
+    if (document.readyState === 'loading') {
+      document.addEventListener('DOMContentLoaded', function() {
+        if (window.Alpine) __initChatStore();
+      });
+    } else {
+      // DOM already loaded, try immediately
+      if (window.Alpine) __initChatStore();
+    }
+  </script>
+  <script defer src="static/chat.js"></script>
  {{ $allGalleryConfigs:=.GalleryConfig }}
  {{ $model:=.Model}}
  <body class="bg-[#101827] text-[#E5E7EB] flex flex-col h-screen" x-data="{ sidebarOpen: true }">
@@ -42,13 +197,22 @@ SOFTWARE.
    <!-- Main container with sidebar toggle -->
    <div class="flex flex-1 overflow-hidden relative">
      <!-- Sidebar -->
-      <div 
+      <div
        class="sidebar bg-[#1E293B] fixed top-16 bottom-0 left-0 w-64 transform transition-transform duration-300 ease-in-out z-30 border-r border-[#101827] overflow-y-auto"
        :class="sidebarOpen ? 'translate-x-0' : '-translate-x-full'">
-        
+
        <div class="p-4 flex justify-between items-center border-b border-[#101827]">
-          <h2 class="text-lg font-semibold text-[#E5E7EB]">Chat Settings</h2>
-          <button 
+          <div class="flex items-center gap-2">
+            <h2 class="text-lg font-semibold text-[#E5E7EB]">Chat Settings</h2>
+            <a
+              href="https://localai.io/features/text-generation/"
+              target="_blank"
+              class="text-[#94A3B8] hover:text-[#38BDF8] transition-colors"
+              title="Documentation">
+              <i class="fas fa-book text-sm"></i>
+            </a>
+          </div>
+          <button
            @click="sidebarOpen = false"
            class="text-[#94A3B8] hover:text-[#E5E7EB] focus:outline-none">
            <i class="fa-solid fa-times"></i>
@@ -66,7 +230,7 @@ SOFTWARE.
              onchange="window.location = this.value"
            >
              <option value="" disabled class="text-[#94A3B8]">Select a model</option>
-             
+
              {{ range .ModelsConfig }}
                {{ $cfg := . }}
                {{ range .KnownUsecaseStrings }}
@@ -101,61 +265,98 @@ SOFTWARE.
            <div class="flex items-center">
              {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
              <h3 class="text-md font-medium">{{ $model }}</h3>
-            </div>
-            <button data-twe-ripple-init data-twe-ripple-color="light" class="w-full text-left flex items-center px-3 py-2 text-xs rounded text-[#E5E7EB] bg-[#101827] hover:bg-[#101827]/80 border border-[#38BDF8]/20 transition-colors" data-modal-target="model-info-modal" data-modal-toggle="model-info-modal">
-              <i class="fas fa-info-circle mr-2 text-[#38BDF8]"></i>
-              Model Information
-            </button>
-          </div>
-          {{ end }}
-          {{ end }}
-
-          <div x-data="{ activeTab: 'actions' }" class="space-y-4">
-            <!-- Tab navigation -->
-            <div class="flex border-b border-[#101827]">
              <button 
-                @click="activeTab = 'actions'" 
-                :class="activeTab === 'actions' ? 'border-b-2 border-[#38BDF8] text-[#E5E7EB]' : 'text-[#94A3B8] hover:text-[#E5E7EB]'"
-                class="py-2 px-4 text-sm font-medium">
-                Actions
+                data-twe-ripple-init 
+                data-twe-ripple-color="light" 
+                class="ml-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors" 
+                data-modal-target="model-info-modal" 
+                data-modal-toggle="model-info-modal"
+                title="Model Information">
+                <i class="fas fa-info-circle text-sm"></i>
              </button>
-              <button 
-                @click="activeTab = 'settings'" 
-                :class="activeTab === 'settings' ? 'border-b-2 border-[#38BDF8] text-[#E5E7EB]' : 'text-[#94A3B8] hover:text-[#E5E7EB]'"
-                class="py-2 px-4 text-sm font-medium">
-                Settings
-              </button>
-            </div>
-
-            <!-- Actions tab -->
-            <div x-show="activeTab === 'actions'" class="space-y-3">
              <button
                @click="$store.chat.clear()"
                id="clear"
                title="Clear chat history"
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#101827] hover:bg-[#101827]/80 border border-[#1E293B] transition-colors"
-              >
-                <i class="fa-solid fa-trash-can mr-2"></i> Clear chat
+                class="ml-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors">
+                <i class="fa-solid fa-trash-can text-sm"></i>
              </button>
-              
-              <a 
-                href="https://localai.io/features/text-generation/" 
-                target="_blank"
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
-              >
-                <i class="fas fa-book mr-2"></i> Documentation
-              </a>
-              
-              <a 
-                href="browse?term={{.Model}}" 
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
-              >
-                <i class="fas fa-brain mr-2"></i> Browse Model
-              </a>
            </div>
+          </div>
+          {{ end }}
+          {{ end }}
+
+          <div x-data="{ showPromptForm: false }" class="space-y-3">
+              <!-- Token Usage Statistics -->
+              <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
+                <div class="flex items-center justify-between mb-2">
+                  <h4 class="text-sm font-semibold text-[#E5E7EB] flex items-center">
+                    <i class="fas fa-chart-line mr-2 text-[#38BDF8]"></i>
+                    Token Usage
+                  </h4>
+                </div>
+                <div class="space-y-1.5 text-xs">
+                  <div class="flex justify-between text-[#94A3B8]">
+                    <span>Prompt:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.promptTokens)"></span>
+                  </div>
+                  <div class="flex justify-between text-[#94A3B8]">
+                    <span>Completion:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.completionTokens)"></span>
+                  </div>
+                  <div class="flex justify-between text-[#94A3B8] border-t border-[#101827] pt-1.5">
+                    <span class="font-semibold text-[#38BDF8]">Total:</span>
+                    <span class="text-[#E5E7EB] font-bold" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                  </div>
+                </div>
+              </div>
+
+              <!-- Context Size Indicator -->
+              <template x-if="$store.chat.contextSize && $store.chat.contextSize > 0">
+                <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
+                  <div class="flex items-center justify-between mb-2">
+                    <h4 class="text-sm font-semibold text-[#E5E7EB] flex items-center">
+                      <i class="fas fa-database mr-2 text-[#38BDF8]"></i>
+                      Context Window
+                    </h4>
+                  </div>
+                  <div class="space-y-2">
+                    <div class="flex justify-between text-xs text-[#94A3B8] mb-1">
+                      <span>Used / Available</span>
+                      <span class="text-[#E5E7EB] font-medium">
+                        <span x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                        / 
+                        <span x-text="new Intl.NumberFormat().format($store.chat.contextSize)"></span>
+                      </span>
+                    </div>
+                    <div class="w-full bg-[#101827] rounded-full h-2 overflow-hidden border border-[#1E293B]">
+                      <div class="h-full rounded-full transition-all duration-300 ease-out"
+                           :class="{
+                             'bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]': $store.chat.getContextUsagePercent() < 80,
+                             'bg-gradient-to-r from-yellow-500 to-orange-500': $store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95,
+                             'bg-gradient-to-r from-red-500 to-red-600': $store.chat.getContextUsagePercent() >= 95
+                           }"
+                           :style="'width: ' + Math.min(100, $store.chat.getContextUsagePercent()) + '%'">
+                      </div>
+                    </div>
+                    <div class="flex justify-between text-xs">
+                      <span class="text-[#94A3B8]">
+                        Remaining: 
+                        <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.getRemainingTokens())"></span>
+                      </span>
+                      <span class="text-[#94A3B8]">
+                        <span x-text="Math.round($store.chat.getContextUsagePercent())"></span>%
+                      </span>
+                    </div>
+                    <div x-show="$store.chat.getContextUsagePercent() >= 80" class="mt-2 p-2 bg-yellow-500/10 border border-yellow-500/30 rounded text-yellow-300 text-xs">
+                      <i class="fas fa-exclamation-triangle mr-1"></i>
+                      <span x-show="$store.chat.getContextUsagePercent() >= 95">Context window nearly full!</span>
+                      <span x-show="$store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95">Approaching context limit</span>
+                    </div>
+                  </div>
+                </div>
+              </template>

-            <!-- Settings tab -->
-            <div x-show="activeTab === 'settings'" x-data="{ showPromptForm: false }" class="space-y-3">           
              {{ if $model }}
              {{ $galleryConfig:= index $allGalleryConfigs $model}}
              {{ if $galleryConfig }}
@@ -167,97 +368,120 @@ SOFTWARE.
              {{ end }}
              {{ if and $modelConfig (or (ne $modelConfig.MCP.Servers "") (ne $modelConfig.MCP.Stdio "")) }}
              <!-- MCP Toggle -->
-              <div class="flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700">
-                <span><i class="fa-solid fa-plug mr-2"></i> Agentic MCP Mode</span>
+              <div class="flex items-center justify-between px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] border border-[#38BDF8]/20">
+                <span><i class="fa-solid fa-plug mr-2 text-[#38BDF8]"></i> Agentic MCP Mode</span>
                <label class="relative inline-flex items-center cursor-pointer">
                  <input type="checkbox" id="mcp-toggle" class="sr-only peer" x-model="$store.chat.mcpMode">
-                  <div class="w-11 h-6 bg-gray-600 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div>
+                  <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/30 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-[#1E293B] after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
                </label>
              </div>
-              
+
              <!-- MCP Mode Notification -->
-              <div x-show="$store.chat.mcpMode" class="p-3 bg-blue-900/20 border border-blue-700/50 rounded text-blue-100 text-xs">
+              <div x-show="$store.chat.mcpMode" class="p-3 bg-[#38BDF8]/10 border border-[#38BDF8]/30 rounded text-[#94A3B8] text-xs">
                <div class="flex items-start space-x-2">
-                  <i class="fa-solid fa-info-circle text-blue-400 mt-0.5"></i>
+                  <i class="fa-solid fa-info-circle text-[#38BDF8] mt-0.5"></i>
                  <div>
-                    <p class="font-medium text-blue-200 mb-1">Non-streaming Mode Active</p>
-                    <p class="text-blue-300">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
+                    <p class="font-medium text-[#E5E7EB] mb-1">Non-streaming Mode Active</p>
+                    <p class="text-[#94A3B8]">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
                  </div>
                </div>
              </div>
              {{ end }}
              {{ end }}
              {{ end }}
-              
-              <button 
-                @click="showPromptForm = !showPromptForm" 
-                class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+
+              <button
+                @click="showPromptForm = !showPromptForm"
+                class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 hover:border-[#38BDF8]/40 transition-colors glow-on-hover"
              >
-                <span><i class="fa-solid fa-message mr-2"></i> System Prompt</span>
+                <span><i class="fa-solid fa-message mr-2 text-[#38BDF8]"></i> System Prompt</span>
                <i :class="showPromptForm ? 'fa-chevron-up' : 'fa-chevron-down'" class="fa-solid"></i>
              </button>
-              
-              <div x-show="showPromptForm" class="p-3 bg-gray-700 rounded">
-                <form id="system_prompt" class="flex flex-col space-y-2">
+
+              <div x-show="showPromptForm" x-data="{
+                showToast: false,
+                previousPrompt: $store.chat.systemPrompt,
+                isUpdated() {
+                  if (this.previousPrompt !== $store.chat.systemPrompt) {
+                    this.showToast = true;
+                    this.previousPrompt = $store.chat.systemPrompt;
+                    setTimeout(() => {this.showToast = false;}, 2000);
+                  }
+                } 
+              }" class="p-3 bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg">
+                <form id="system_prompt" @submit.prevent="isUpdated" class="flex flex-col space-y-2">
                  <textarea
                    type="text"
                    id="systemPrompt"
                    name="systemPrompt"
-                    class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none min-h-24"
+                    class="bg-[#101827] text-[#E5E7EB] border border-[#1E293B] focus:border-[#38BDF8] focus:ring focus:ring-[#38BDF8] focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none min-h-24 placeholder-[#94A3B8]"
                    placeholder="System prompt"
                    x-model.lazy="$store.chat.systemPrompt"
                  ></textarea>
+                  <div
+                    x-show="showToast"
+                    x-transition
+                    class="mb-2 text-green-400 px-4 py-2 text-sm text-center bg-green-500/10 border border-green-500/30 rounded"
+                  >
+                    System prompt updated!
+                  </div>
                  <button
                    type="submit"
-                    class="px-3 py-2 text-sm rounded text-white bg-blue-600 hover:bg-blue-700 transition-colors"
+                    class="px-3 py-2 text-sm rounded text-[#101827] bg-[#38BDF8] hover:bg-[#38BDF8]/90 transition-colors font-medium"
                  >
                    Save System Prompt
                  </button>
                </form>
              </div>
-            </div>
          </div>
        </div>
      </div>

      <!-- Main chat container (shifts with sidebar) -->
-      <div 
+      <div
        class="flex-1 flex flex-col transition-all duration-300 ease-in-out"
        :class="sidebarOpen ? 'ml-64' : 'ml-0'">
-        
+
        <!-- Chat header with toggle button -->
-        <div class="border-b border-gray-700 p-4 flex items-center">
-          <!-- Sidebar toggle button moved to be the first element in the header and with clear styling -->
-          <button 
-            @click="sidebarOpen = !sidebarOpen" 
-            class="mr-4 text-gray-300 hover:text-white focus:outline-none bg-gray-800 hover:bg-gray-700 p-2 rounded"
-            style="min-width: 36px;"
-            title="Toggle settings">
-            <i class="fa-solid" :class="sidebarOpen ? 'fa-times' : 'fa-bars'"></i>
-          </button>
-          
+        <div class="border-b border-[#1E293B] p-4 flex items-center justify-between">
          <div class="flex items-center">
-            <i class="fa-solid fa-comments mr-2"></i>
-            {{ if $model }}
-            {{ $galleryConfig:= index $allGalleryConfigs $model}}
-            {{ if $galleryConfig }}
-            {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
-            {{ end }}
-            {{ end }}
-            <h1 class="text-lg font-semibold">
-              Chat {{ if .Model }} with {{.Model}} {{ end }}
-            </h1>
+            <!-- Sidebar toggle button moved to be the first element in the header and with clear styling -->
+            <button
+              @click="sidebarOpen = !sidebarOpen"
+              class="mr-4 text-[#94A3B8] hover:text-[#E5E7EB] focus:outline-none bg-[#1E293B] hover:bg-[#1E293B]/80 p-2 rounded transition-colors"
+              style="min-width: 36px;"
+              title="Toggle settings">
+              <i class="fa-solid" :class="sidebarOpen ? 'fa-times' : 'fa-bars'"></i>
+            </button>
+
+            <div class="flex items-center">
+              <i class="fa-solid fa-comments mr-2 text-[#38BDF8]"></i>
+              {{ if $model }}
+              {{ $galleryConfig:= index $allGalleryConfigs $model}}
+              {{ if $galleryConfig }}
+              {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
+              {{ end }}
+              {{ end }}
+              <h1 class="text-lg font-semibold text-[#E5E7EB]">
+                Chat {{ if .Model }} with {{.Model}} {{ end }}
+              </h1>
+              <!-- Loading indicator next to model name -->
+              <div id="header-loading-indicator" class="ml-3 text-[#38BDF8]" style="display: none;">
+                <i class="fas fa-spinner fa-spin text-sm"></i>
+              </div>
+            </div>
          </div>
+
        </div>

        <!-- Chat messages area -->
        <div class="flex-1 p-4 overflow-auto" id="chat" x-data="{history: $store.chat.history}">
-          <p id="usage" x-show="history.length === 0" class="text-gray-300">
+          <p id="usage" x-show="history.length === 0" class="text-[#94A3B8]">
            Start chatting with the AI by typing a prompt in the input field below and pressing Enter.<br>
-            <ul class="list-disc list-inside">
-              <li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image"></i> icon.</li>
-              <li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone"></i> icon.</li>
-              <li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file"></i> icon.</li>
+            <ul class="list-disc list-inside mt-2 space-y-1">
+              <li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image text-[#38BDF8]"></i> icon.</li>
+              <li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone text-[#38BDF8]"></i> icon.</li>
+              <li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file text-[#38BDF8]"></i> icon.</li>
            </ul>
          </p>
          <div id="messages" class="max-w-3xl mx-auto">
@@ -268,8 +492,8 @@ SOFTWARE.
                <template x-if="message.role === 'user'">
                  <div class="flex items-center space-x-2">
                    <div class="flex flex-col flex-1 items-end">
-                      <span class="text-xs font-semibold text-gray-400">You</span>
-                      <div class="p-2 flex-1 rounded bg-gray-700 text-white" x-html="message.html"></div>
+                      <span class="text-xs font-semibold text-[#94A3B8] mb-1">You</span>
+                      <div class="p-3 flex-1 rounded-lg bg-gradient-to-br from-[#1E293B] to-[#101827] text-[#E5E7EB] border border-[#38BDF8]/20 shadow-lg" x-html="message.html"></div>
                      <template x-if="message.image && message.image.length > 0">
                        <div class="mt-2 space-y-2">
                          <template x-for="(img, index) in message.image" :key="index">
@@ -293,12 +517,12 @@ SOFTWARE.
                <template x-if="message.role === 'thinking'">
                  <div class="flex items-center space-x-2 w-full">
                    <div class="flex flex-col flex-1">
-                      <div class="p-2 flex-1 rounded bg-blue-900/50 text-blue-100 border border-blue-700/50">
-                        <div class="flex items-center space-x-2">
-                          <i class="fa-solid fa-brain text-blue-400"></i>
-                          <span class="text-xs font-semibold text-blue-300">Thinking</span>
+                      <div class="p-3 flex-1 rounded-lg bg-[#38BDF8]/10 text-[#94A3B8] border border-[#38BDF8]/30">
+                        <div class="flex items-center space-x-2 mb-2">
+                          <i class="fa-solid fa-brain text-[#38BDF8]"></i>
+                          <span class="text-xs font-semibold text-[#38BDF8]">Thinking</span>
                        </div>
-                        <div class="mt-1" x-html="message.html"></div>
+                        <div class="mt-1 text-[#E5E7EB]" x-html="message.html"></div>
                      </div>
                    </div>
                  </div>
@@ -306,13 +530,13 @@ SOFTWARE.
                <template x-if="message.role != 'user' && message.role != 'thinking'">
                  <div class="flex items-center space-x-2">
                    {{ if $galleryConfig }}
-                    {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8">{{end}}
+                    {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[#38BDF8]/20">{{end}}
                    {{ end }}
                    <div class="flex flex-col flex-1">
-                      <span class="text-xs font-semibold text-gray-400">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
-                      <div class="flex-1 text-white flex items-center space-x-2">
-                        <div x-html="message.html"></div>
-                        <button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-gray-400 hover:text-gray-100">
+                      <span class="text-xs font-semibold text-[#94A3B8] mb-1">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
+                      <div class="flex-1 text-[#E5E7EB] flex items-center space-x-2">
+                        <div class="p-3 rounded-lg bg-gradient-to-br from-[#1E293B] to-[#101827] border border-[#8B5CF6]/20 shadow-lg" x-html="message.html"></div>
+                        <button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-[#94A3B8] hover:text-[#38BDF8] transition-colors p-1">
                          <i class="fa-solid fa-copy"></i>
                        </button>
                      </div>
@@ -339,7 +563,7 @@ SOFTWARE.
                {{ else }}
                <i
                  class="fa-solid h-8 w-8"
-                  :class="message.role === 'user' ? 'fa-user' : 'fa-robot'"
+                  :class="message.role === 'user' ? 'fa-user text-[#38BDF8]' : 'fa-robot text-[#8B5CF6]'"
                ></i>
                {{ end }}
              </div>
@@ -349,65 +573,119 @@ SOFTWARE.


          <!-- Chat Input -->
-          <div class="p-4 border-t border-gray-700" x-data="{ inputValue: '', shiftPressed: false, fileName: '', isLoading: false }">
+          <div class="p-4 border-t border-[#1E293B]" x-data="{ inputValue: '', shiftPressed: false, fileName: '' }">
            <form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt" class="max-w-3xl mx-auto">
-              <div class="relative w-full bg-gray-800 rounded-xl shadow-md">
+              <!-- Token Usage and Context Window - Compact above input -->
+              <div class="mb-3 flex items-center justify-between gap-4 text-xs">
+                <!-- Token Usage -->
+                <div class="flex items-center gap-3 text-[#94A3B8]">
+                  <div class="flex items-center gap-1">
+                    <i class="fas fa-chart-line text-[#38BDF8]"></i>
+                    <span>Prompt:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.promptTokens)"></span>
+                  </div>
+                  <div class="flex items-center gap-1">
+                    <span>Completion:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.completionTokens)"></span>
+                  </div>
+                  <div class="flex items-center gap-1 border-l border-[#1E293B] pl-3">
+                    <span class="text-[#38BDF8] font-semibold">Total:</span>
+                    <span class="text-[#E5E7EB] font-bold" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                  </div>
+                  <!-- Tokens per second display -->
+                  <div id="tokens-per-second-container" class="flex items-center gap-1 border-l border-[#1E293B] pl-3">
+                    <i class="fas fa-tachometer-alt text-[#38BDF8]"></i>
+                    <span id="tokens-per-second" class="text-[#E5E7EB] font-medium">-</span>
+                  </div>
+                </div>
+
+                <!-- Context Window -->
+                <template x-if="$store.chat.contextSize && $store.chat.contextSize > 0">
+                  <div class="flex items-center gap-2 text-[#94A3B8]">
+                    <i class="fas fa-database text-[#38BDF8]"></i>
+                    <span>
+                      <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                      / 
+                      <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.contextSize)"></span>
+                    </span>
+                    <div class="w-16 bg-[#101827] rounded-full h-1.5 overflow-hidden border border-[#1E293B]">
+                      <div class="h-full rounded-full transition-all duration-300 ease-out"
+                           :class="{
+                             'bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]': $store.chat.getContextUsagePercent() < 80,
+                             'bg-gradient-to-r from-yellow-500 to-orange-500': $store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95,
+                             'bg-gradient-to-r from-red-500 to-red-600': $store.chat.getContextUsagePercent() >= 95
+                           }"
+                           :style="'width: ' + Math.min(100, $store.chat.getContextUsagePercent()) + '%'">
+                      </div>
+                    </div>
+                    <span class="text-[#94A3B8]" x-text="Math.round($store.chat.getContextUsagePercent()) + '%'"></span>
+                    <span x-show="$store.chat.getContextUsagePercent() >= 80" class="text-yellow-400">
+                      <i class="fas fa-exclamation-triangle"></i>
+                    </span>
+                  </div>
+                </template>
+              </div>
+
+              <div class="relative w-full bg-[#1E293B] border border-[#38BDF8]/20 rounded-xl shadow-lg">
                <textarea
                  id="input"
                  name="input"
                  x-model="inputValue"
                  placeholder="Send a message..."
-                  class="p-4 pr-16 w-full bg-gray-800 text-gray-100 placeholder-gray-400 focus:outline-none resize-none border-0 rounded-xl transition-colors duration-200"
+                  class="p-3 pr-16 w-full bg-[#1E293B] text-[#E5E7EB] placeholder-[#94A3B8] focus:outline-none resize-none border-0 rounded-xl transition-colors duration-200 focus:ring-2 focus:ring-[#38BDF8]/50"
                  required
                  @keydown.shift="shiftPressed = true"
                  @keyup.shift="shiftPressed = false"
-                  @keydown.enter="if (!shiftPressed) { submitPrompt($event); }"
-                  rows="3"
-                  style="box-shadow: 0 0 0 1px rgba(75, 85, 99, 0.4) inset;"
+                  @keydown.enter.prevent="if (!shiftPressed) { submitPrompt($event); }"
+                  rows="2"
                ></textarea>
-                <span x-text="fileName" id="fileName" class="absolute right-16 top-4 text-gray-400 text-sm mr-2"></span>
+                <span x-text="fileName" id="fileName" class="absolute right-16 top-3 text-[#94A3B8] text-xs mr-2"></span>
                <button
                  type="button"
                  onclick="document.getElementById('input_image').click()"
-                  class="fa-solid fa-image text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  class="fa-solid fa-image text-[#94A3B8] absolute right-12 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
                  title="Attach images"
                ></button>
                <button
                  type="button"
                  onclick="document.getElementById('input_audio').click()"
-                  class="fa-solid fa-microphone text-gray-400 absolute right-20 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  class="fa-solid fa-microphone text-[#94A3B8] absolute right-20 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
                  title="Attach an audio file"
                ></button>
                <button
                  type="button"
                  onclick="document.getElementById('input_file').click()"
-                  class="fa-solid fa-file text-gray-400 absolute right-28 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  class="fa-solid fa-file text-[#94A3B8] absolute right-28 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
                  title="Upload text, markdown or PDF file"
                ></button>
-                
-                <!-- Send button and loader in the same position -->
-                <div class="absolute right-3 top-4">
-                  <!-- Loader (hidden by default) -->
-                  <div id="loader" class="text-lg p-2" style="display: none;">
-                    <svg class="animate-spin h-5 w-5 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
-                      <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
-                      <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
-                    </svg>
-                  </div>
-                  
+
+                <!-- Send button and stop button in the same position -->
+                <div class="absolute right-3 top-3 flex items-center">
+                  <!-- Stop button (hidden by default, shown when request is in progress) -->
+                  <button
+                    id="stop-button"
+                    type="button"
+                    onclick="stopRequest()"
+                    class="text-lg p-2 text-red-400 hover:text-red-500 transition-colors duration-200"
+                    style="display: none;"
+                    title="Stop request"
+                  >
+                    <i class="fa-solid fa-stop"></i>
+                  </button>
+
                  <!-- Send button -->
                  <button
                    id="send-button"
                    type="submit"
-                    class="text-lg p-2 text-gray-400 hover:text-blue-400 transition-colors duration-200"
-                    title="Send message"
+                    class="text-lg p-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors duration-200"
+                    title="Send message (Enter)"
                  >
                    <i class="fa-solid fa-paper-plane"></i>
                  </button>
                </div>
              </div>
            </form>
-            <input id="chat-model" type="hidden" value="{{.Model}}">
+            <input id="chat-model" type="hidden" value="{{.Model}}" {{ if .ContextSize }}data-context-size="{{.ContextSize}}"{{ end }}>
            <input
              id="input_image"
              type="file"
@@ -437,7 +715,7 @@ SOFTWARE.
        </div>
      </div>
    </div>
-    
+
    <!-- Modal moved outside of sidebar to appear in center of page -->
    {{ if $model }}
    {{ $galleryConfig:= index $allGalleryConfigs $model}}
@@ -455,22 +733,22 @@ SOFTWARE.
              <span class="sr-only">Close modal</span>
            </button>
          </div>
-                  
+
          <!-- Body -->
          <div class="p-4 md:p-5 space-y-4">
            <div class="flex justify-center items-center">
              {{ if $galleryConfig.Icon }}<img class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded" src="{{$galleryConfig.Icon}}" loading="lazy"/>{{end}}
            </div>
-            <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">{{ $galleryConfig.Description }}</p>
+            <div id="model-info-description" class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full">{{ $galleryConfig.Description }}</div>
            <hr>
            <p class="text-sm font-semibold text-gray-900 dark:text-white">Links</p>
            <ul>
              {{range $galleryConfig.URLs}}
              <li><a href="{{ . }}" target="_blank">{{ . }}</a></li>
              {{end}}
-            </ul>                   
+            </ul>
          </div>
-                  
+
          <!-- Footer -->
          <div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
            <button data-modal-hide="model-info-modal" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">
@@ -483,59 +761,9 @@ SOFTWARE.
    {{ end }}
    {{ end }}

-    <!-- Alpine store initialization -->
+    <!-- Alpine store initialization and utilities -->
    <script>
      document.addEventListener("alpine:init", () => {
-        Alpine.store("chat", {
-          history: [],
-          languages: [undefined],
-          systemPrompt: "",
-          mcpMode: false,
-          clear() {
-            this.history.length = 0;
-          },
-          add(role, content, image, audio) {
-            const N = this.history.length - 1;
-            if (this.history.length && this.history[N].role === role) {
-              this.history[N].content += content;
-              this.history[N].html = DOMPurify.sanitize(
-                marked.parse(this.history[N].content)
-              );
-            } else {
-              let c = "";
-              const lines = content.split("\n");
-              lines.forEach((line) => {
-                c += DOMPurify.sanitize(marked.parse(line));
-              });
-              this.history.push({ role, content, html: c, image, audio });
-            }
-            document.getElementById('messages').scrollIntoView(false);
-            const parser = new DOMParser();
-            const html = parser.parseFromString(
-              this.history[this.history.length - 1].html,
-              "text/html"
-            );
-            const code = html.querySelectorAll("pre code");
-            if (!code.length) return;
-            code.forEach((el) => {
-              const language = el.className.split("language-")[1];
-              if (this.languages.includes(language)) return;
-              const script = document.createElement("script");
-              script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
-              document.head.appendChild(script);
-              this.languages.push(language);
-            });
-          },
-          messages() {
-            return this.history.map((message) => ({
-              role: message.role,
-              content: message.content,
-              image: message.image,
-              audio: message.audio,
-            }));
-          },
-        });
-
        window.copyToClipboard = (content) => {
          const tempElement = document.createElement('div');
          tempElement.innerHTML = content;
@@ -548,6 +776,134 @@ SOFTWARE.
          });
        };
      });
+
+      // Context size is now initialized in the Alpine store initialization above
+
+      // Process markdown in model info modal when it opens
+      function initMarkdownProcessing() {
+        // Wait for marked and DOMPurify to be available
+        if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
+          setTimeout(initMarkdownProcessing, 100);
+          return;
+        }
+
+        const modalElement = document.getElementById('model-info-modal');
+        const descriptionElement = document.getElementById('model-info-description');
+        
+        if (!modalElement || !descriptionElement) {
+          return;
+        }
+
+        // Store original text in data attribute if not already stored
+        let originalText = descriptionElement.dataset.originalText;
+        if (!originalText) {
+          originalText = descriptionElement.textContent || descriptionElement.innerText;
+          descriptionElement.dataset.originalText = originalText;
+        }
+
+        // Process markdown function
+        const processMarkdown = () => {
+          if (!descriptionElement || !originalText) return;
+          
+          try {
+            // Check if already processed (has HTML tags that look like markdown output)
+            const currentContent = descriptionElement.innerHTML.trim();
+            if (currentContent.startsWith('<') && (currentContent.includes('<p>') || currentContent.includes('<h') || currentContent.includes('<ul>') || currentContent.includes('<ol>'))) {
+              return; // Already processed
+            }
+            
+            // Use stored original text
+            const textToProcess = descriptionElement.dataset.originalText || originalText;
+            if (textToProcess && textToProcess.trim()) {
+              const html = marked.parse(textToProcess);
+              descriptionElement.innerHTML = DOMPurify.sanitize(html);
+            }
+          } catch (error) {
+            console.error('Error rendering markdown:', error);
+          }
+        };
+
+        // Process immediately if modal is already visible
+        if (!modalElement.classList.contains('hidden')) {
+          processMarkdown();
+        }
+
+        // Listen for modal show events - check both aria-hidden and class changes
+        const observer = new MutationObserver((mutations) => {
+          mutations.forEach((mutation) => {
+            if (mutation.type === 'attributes') {
+              const isHidden = modalElement.classList.contains('hidden') || 
+                              modalElement.getAttribute('aria-hidden') === 'true';
+              if (!isHidden) {
+                // Modal is now visible, process markdown
+                setTimeout(processMarkdown, 150);
+              }
+            }
+          });
+        });
+
+        observer.observe(modalElement, {
+          attributes: true,
+          attributeFilter: ['aria-hidden', 'class'],
+          childList: false,
+          subtree: false
+        });
+
+        // Also listen for click events on modal toggle buttons
+        document.querySelectorAll('[data-modal-toggle="model-info-modal"]').forEach(button => {
+          button.addEventListener('click', () => {
+            setTimeout(processMarkdown, 300);
+          });
+        });
+
+        // Process on initial load if libraries are ready
+        setTimeout(processMarkdown, 200);
+      }
+
+      // Start initialization
+      if (document.readyState === 'loading') {
+        document.addEventListener('DOMContentLoaded', initMarkdownProcessing);
+      } else {
+        initMarkdownProcessing();
+      }
    </script>
+
+    <style>
+    /* Markdown content overflow handling */
+    #model-info-description {
+        word-wrap: break-word;
+        overflow-wrap: anywhere;
+        max-width: 100%;
+    }
+
+    #model-info-description pre {
+        overflow-x: auto;
+        max-width: 100%;
+        white-space: pre-wrap;
+        word-wrap: break-word;
+    }
+
+    #model-info-description code {
+        word-wrap: break-word;
+        overflow-wrap: break-word;
+    }
+
+    #model-info-description pre code {
+        white-space: pre;
+        overflow-x: auto;
+        display: block;
+    }
+
+    #model-info-description table {
+        max-width: 100%;
+        overflow-x: auto;
+        display: block;
+    }
+
+    #model-info-description img {
+        max-width: 100%;
+        height: auto;
+    }
+    </style>
  </body>
-</html>
+</html>
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -3,9 +3,10 @@
 {{template "views/partials/head" .}}

 <body class="bg-[#101827] text-[#E5E7EB]">
-<div class="flex flex-col min-h-screen">
+<div class="flex flex-col min-h-screen" x-data="importModel()" x-init="init()">

    {{template "views/partials/navbar" .}}
+    {{template "views/partials/inprogress" .}}

    <div class="container mx-auto px-4 py-8 flex-grow">
        <!-- Hero Header -->
@@ -24,19 +25,44 @@
                                {{if .ModelName}}Edit Model: {{.ModelName}}{{else}}Import New Model{{end}}
                            </span>
                        </h1>
-                        <p class="text-lg text-gray-300 font-light">Configure your model settings using YAML</p>
+                        <p class="text-lg text-gray-300 font-light" x-text="isAdvancedMode ? 'Configure your model settings using YAML' : 'Import a model from URI with preferences'"></p>
                    </div>
                    <div class="flex gap-3">
-                        <button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
-                            <i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
-                            <span>Validate</span>
-                            <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
-                        </button>
-                        <button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
-                            <i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
-                            <span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
-                            <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
-                        </button>
+                        <!-- Mode Toggle (only show when not in edit mode) -->
+                        <template x-if="!isEditMode">
+                            <button @click="toggleMode()" 
+                                    class="group relative inline-flex items-center bg-gradient-to-r from-gray-600 to-gray-700 hover:from-gray-700 hover:to-gray-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl">
+                                <i class="fas group-hover:animate-pulse" :class="isAdvancedMode ? 'fa-magic mr-2' : 'fa-code mr-2'"></i>
+                                <span x-text="isAdvancedMode ? 'Simple Mode' : 'Advanced Mode'"></span>
+                                <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                            </button>
+                        </template>
+                        <!-- Advanced Mode Buttons -->
+                        <template x-if="isAdvancedMode">
+                            <div class="flex gap-3">
+                                <button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
+                                    <i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
+                                    <span>Validate</span>
+                                    <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                                </button>
+                                <button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
+                                    <i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
+                                    <span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
+                                    <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                                </button>
+                            </div>
+                        </template>
+                        <!-- Simple Mode Button -->
+                        <template x-if="!isAdvancedMode && !isEditMode">
+                            <button @click="submitImport()" 
+                                    :disabled="isSubmitting || !importUri.trim()"
+                                    :class="(isSubmitting || !importUri.trim()) ? 'opacity-50 cursor-not-allowed' : ''"
+                                    class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
+                                <i class="fas group-hover:animate-pulse" :class="isSubmitting ? 'fa-spinner fa-spin mr-2' : 'fa-upload mr-2'"></i>
+                                <span x-text="isSubmitting ? 'Importing...' : 'Import Model'"></span>
+                                <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                            </button>
+                        </template>
                    </div>
                </div>
            </div>
@@ -45,8 +71,187 @@
        <!-- Alert Messages -->
        <div id="alertContainer" class="mb-6"></div>

-        <!-- YAML Editor Panel -->
-        <div class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
+        <!-- Simple Import Mode -->
+        <div x-show="!isAdvancedMode && !isEditMode" 
+             x-transition:enter="transition ease-out duration-300"
+             x-transition:enter-start="opacity-0 transform translate-y-4"
+             x-transition:enter-end="opacity-100 transform translate-y-0"
+             class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm p-8">
+            <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-green-500/5 to-emerald-500/5"></div>
+            
+            <div class="relative space-y-6">
+                <h2 class="text-2xl font-semibold text-white flex items-center gap-3 mb-6">
+                    <div class="w-10 h-10 rounded-lg bg-green-500/20 flex items-center justify-center">
+                        <i class="fas fa-link text-green-400"></i>
+                    </div>
+                    Import from URI
+                </h2>
+
+                <!-- URI Input -->
+                <div>
+                    <label class="block text-sm font-medium text-gray-300 mb-2">
+                        <i class="fas fa-link mr-2"></i>Model URI
+                    </label>
+                    <input 
+                        x-model="importUri"
+                        type="text" 
+                        placeholder="https://example.com/model.gguf or file:///path/to/model.gguf"
+                        class="w-full px-4 py-3 bg-gray-900/90 border border-gray-700/70 rounded-xl text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                        :disabled="isSubmitting">
+                    <p class="mt-2 text-xs text-gray-400">
+                        Enter the URI or path to the model file you want to import
+                    </p>
+                </div>
+
+                <!-- Preferences Section -->
+                <div>
+                    <div class="flex items-center justify-between mb-4">
+                        <label class="block text-sm font-medium text-gray-300">
+                            <i class="fas fa-cog mr-2"></i>Preferences (Optional)
+                        </label>
+                    </div>
+                    
+                    <!-- Common Preferences -->
+                    <div class="space-y-4 mb-6 p-4 bg-gray-900/50 rounded-xl border border-gray-700/50">
+                        <h3 class="text-sm font-semibold text-gray-300 mb-3 flex items-center">
+                            <i class="fas fa-star mr-2 text-yellow-400"></i>Common Preferences
+                        </h3>
+                        
+                        <!-- Backend Selection -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-server mr-2"></i>Backend
+                            </label>
+                            <select 
+                                x-model="commonPreferences.backend"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                                <option value="">Auto-detect (based on URI)</option>
+                                <option value="llama-cpp">llama-cpp</option>
+                                <option value="mlx">mlx</option>
+                                <option value="mlx-vlm">mlx-vlm</option>
+                            </select>
+                            <p class="mt-1 text-xs text-gray-400">
+                                Force a specific backend. Leave empty to auto-detect from URI.
+                            </p>
+                        </div>
+                        
+                        <!-- Model Name -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-tag mr-2"></i>Model Name
+                            </label>
+                            <input 
+                                x-model="commonPreferences.name"
+                                type="text" 
+                                placeholder="Leave empty to use filename"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Custom name for the model. If empty, the filename will be used.
+                            </p>
+                        </div>
+                        
+                        <!-- Description -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-align-left mr-2"></i>Description
+                            </label>
+                            <textarea 
+                                x-model="commonPreferences.description"
+                                rows="3"
+                                placeholder="Leave empty to use default description"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all resize-none"
+                                :disabled="isSubmitting"></textarea>
+                            <p class="mt-1 text-xs text-gray-400">
+                                Custom description for the model. If empty, a default description will be generated.
+                            </p>
+                        </div>
+                        
+                        <!-- Quantizations -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-layer-group mr-2"></i>Quantizations
+                            </label>
+                            <input 
+                                x-model="commonPreferences.quantizations"
+                                type="text" 
+                                placeholder="q4_k_m,q4_k_s,q3_k_m (comma-separated)"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).
+                            </p>
+                        </div>
+                        
+                        <!-- MMProj Quantizations -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-image mr-2"></i>MMProj Quantizations
+                            </label>
+                            <input 
+                                x-model="commonPreferences.mmproj_quantizations"
+                                type="text" 
+                                placeholder="fp16,fp32 (comma-separated)"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Preferred MMProj quantizations (comma-separated). Examples: fp16, fp32. Leave empty to use default (fp16).
+                            </p>
+                        </div>
+                    </div>
+                    
+                    <!-- Custom Preferences -->
+                    <div class="space-y-3">
+                        <div class="flex items-center justify-between mb-3">
+                            <label class="block text-sm font-medium text-gray-300">
+                                <i class="fas fa-sliders-h mr-2"></i>Custom Preferences
+                            </label>
+                            <button @click="addPreference()" 
+                                    :disabled="isSubmitting"
+                                    class="text-sm px-3 py-1.5 rounded-lg bg-green-600/20 hover:bg-green-600/30 text-green-300 border border-green-500/30 transition-all">
+                                <i class="fas fa-plus mr-1"></i>Add Custom
+                            </button>
+                        </div>
+                        
+                        <div class="space-y-3" x-show="preferences.length > 0">
+                            <template x-for="(pref, index) in preferences" :key="index">
+                                <div class="flex gap-3 items-center">
+                                    <input 
+                                        x-model="pref.key"
+                                        type="text" 
+                                        placeholder="Key"
+                                        class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                        :disabled="isSubmitting">
+                                    <span class="text-gray-400">:</span>
+                                    <input 
+                                        x-model="pref.value"
+                                        type="text" 
+                                        placeholder="Value"
+                                        class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                        :disabled="isSubmitting">
+                                    <button @click="removePreference(index)" 
+                                            :disabled="isSubmitting"
+                                            class="px-3 py-2 rounded-lg bg-red-600/20 hover:bg-red-600/30 text-red-300 border border-red-500/30 transition-all">
+                                        <i class="fas fa-trash"></i>
+                                    </button>
+                                </div>
+                            </template>
+                        </div>
+                        <p class="mt-2 text-xs text-gray-400">
+                            Add custom key-value pairs for advanced configuration
+                        </p>
+                    </div>
+                </div>
+            </div>
+        </div>
+
+        <!-- Advanced YAML Editor Panel -->
+        <div x-show="isAdvancedMode || isEditMode" 
+             x-transition:enter="transition ease-out duration-300"
+             x-transition:enter-start="opacity-0 transform translate-y-4"
+             x-transition:enter-end="opacity-100 transform translate-y-0"
+             class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
            <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-fuchsia-500/5 to-purple-500/5"></div>
            
            <div class="relative sticky top-0 bg-gray-800/95 border-b border-gray-700/50 p-6 flex items-center justify-between z-10 backdrop-blur-sm">
@@ -144,22 +349,22 @@
 }

 /* Enhanced YAML Syntax Highlighting */
-.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for YAML keys */
-.cm-string { color: #10b981 !important; } /* Emerald for strings */
-.cm-number { color: #f59e0b !important; } /* Amber for numbers */
-.cm-comment { color: #6b7280 !important; font-style: italic !important; } /* Gray for comments */
-.cm-property { color: #ec4899 !important; } /* Pink for properties */
-.cm-operator { color: #ef4444 !important; } /* Red for operators */
-.cm-variable { color: #06b6d4 !important; } /* Cyan for variables */
-.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for tags */
-.cm-attribute { color: #f59e0b !important; } /* Amber for attributes */
-.cm-def { color: #ec4899 !important; font-weight: 600 !important; } /* Pink for definitions */
-.cm-bracket { color: #d1d5db !important; } /* Light gray for brackets */
-.cm-punctuation { color: #d1d5db !important; } /* Light gray for punctuation */
-.cm-quote { color: #10b981 !important; } /* Emerald for quotes */
-.cm-meta { color: #6b7280 !important; } /* Gray for meta */
-.cm-builtin { color: #f472b6 !important; } /* Pink for builtins */
-.cm-atom { color: #f59e0b !important; } /* Amber for atoms like true/false/null */
+.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; }
+.cm-string { color: #10b981 !important; }
+.cm-number { color: #f59e0b !important; }
+.cm-comment { color: #6b7280 !important; font-style: italic !important; }
+.cm-property { color: #ec4899 !important; }
+.cm-operator { color: #ef4444 !important; }
+.cm-variable { color: #06b6d4 !important; }
+.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; }
+.cm-attribute { color: #f59e0b !important; }
+.cm-def { color: #ec4899 !important; font-weight: 600 !important; }
+.cm-bracket { color: #d1d5db !important; }
+.cm-punctuation { color: #d1d5db !important; }
+.cm-quote { color: #10b981 !important; }
+.cm-meta { color: #6b7280 !important; }
+.cm-builtin { color: #f472b6 !important; }
+.cm-atom { color: #f59e0b !important; }

 /* Enhanced scrollbar styling */
 .CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler {
@@ -242,22 +447,221 @@
 </style>

 <script>
-class ModelEditor {
-    constructor() {
-        this.modelName = '{{.ModelName}}';
-        this.isEditMode = !!this.modelName;
-        this.yamlEditor = null;
+function importModel() {
+    return {
+        isAdvancedMode: false,
+        isEditMode: {{if .ModelName}}true{{else}}false{{end}},
+        importUri: '',
+        preferences: [],
+        commonPreferences: {
+            backend: '',
+            name: '',
+            description: '',
+            quantizations: '',
+            mmproj_quantizations: ''
+        },
+        isSubmitting: false,
+        currentJobId: null,
+        jobPollInterval: null,
+        yamlEditor: null,
+        modelEditor: null,
        
-        this.init();
-    }
-
-    init() {
-        this.initializeCodeMirror();
-        this.bindEvents();
-    }
-
-    getDefaultConfig() {
-        return `# Model Configuration
+        init() {
+            // If in edit mode, always show advanced mode
+            if (this.isEditMode) {
+                this.isAdvancedMode = true;
+            }
+            
+            // Initialize YAML editor if in advanced mode
+            if (this.isAdvancedMode || this.isEditMode) {
+                this.$nextTick(() => {
+                    this.initializeCodeMirror();
+                    this.bindAdvancedEvents();
+                });
+            }
+        },
+        
+        toggleMode() {
+            this.isAdvancedMode = !this.isAdvancedMode;
+            if (this.isAdvancedMode) {
+                this.$nextTick(() => {
+                    this.initializeCodeMirror();
+                    this.bindAdvancedEvents();
+                });
+            }
+        },
+        
+        addPreference() {
+            this.preferences.push({ key: '', value: '' });
+        },
+        
+        removePreference(index) {
+            this.preferences.splice(index, 1);
+        },
+        
+        async submitImport() {
+            if (!this.importUri.trim()) {
+                this.showAlert('error', 'Please enter a model URI');
+                return;
+            }
+            
+            this.isSubmitting = true;
+            
+            try {
+                // Build preferences object starting with common preferences
+                const prefsObj = {};
+                
+                // Add common preferences (only non-empty values)
+                if (this.commonPreferences.backend && this.commonPreferences.backend.trim()) {
+                    prefsObj.backend = this.commonPreferences.backend.trim();
+                }
+                if (this.commonPreferences.name && this.commonPreferences.name.trim()) {
+                    prefsObj.name = this.commonPreferences.name.trim();
+                }
+                if (this.commonPreferences.description && this.commonPreferences.description.trim()) {
+                    prefsObj.description = this.commonPreferences.description.trim();
+                }
+                if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) {
+                    prefsObj.quantizations = this.commonPreferences.quantizations.trim();
+                }
+                if (this.commonPreferences.mmproj_quantizations && this.commonPreferences.mmproj_quantizations.trim()) {
+                    prefsObj.mmproj_quantizations = this.commonPreferences.mmproj_quantizations.trim();
+                }
+                
+                // Add custom preferences (can override common ones)
+                this.preferences.forEach(pref => {
+                    if (pref.key && pref.value) {
+                        prefsObj[pref.key.trim()] = pref.value.trim();
+                    }
+                });
+                
+                const requestBody = {
+                    uri: this.importUri.trim(),
+                    preferences: Object.keys(prefsObj).length > 0 ? prefsObj : null
+                };
+                
+                const response = await fetch('/models/import-uri', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify(requestBody)
+                });
+                
+                if (!response.ok) {
+                    const error = await response.json().catch(() => ({ error: 'Failed to start import' }));
+                    throw new Error(error.error || 'Failed to start import');
+                }
+                
+                const result = await response.json();
+                
+                if (result.uuid) {
+                    this.currentJobId = result.uuid;
+                    this.showAlert('success', 'Import started! Tracking progress...');
+                    this.startJobPolling();
+                } else if (result.ID) {
+                    // Fallback for different response format
+                    this.currentJobId = result.ID;
+                    this.showAlert('success', 'Import started! Tracking progress...');
+                    this.startJobPolling();
+                } else {
+                    throw new Error('No job ID returned from server');
+                }
+            } catch (error) {
+                this.showAlert('error', 'Failed to start import: ' + error.message);
+                this.isSubmitting = false;
+            }
+        },
+        
+        startJobPolling() {
+            if (this.jobPollInterval) {
+                clearInterval(this.jobPollInterval);
+            }
+            
+            this.jobPollInterval = setInterval(async () => {
+                if (!this.currentJobId) {
+                    clearInterval(this.jobPollInterval);
+                    return;
+                }
+                
+                try {
+                    const response = await fetch(`/models/jobs/${this.currentJobId}`);
+                    if (!response.ok) {
+                        return;
+                    }
+                    
+                    const jobData = await response.json();
+                    
+                    if (jobData.completed) {
+                        clearInterval(this.jobPollInterval);
+                        this.isSubmitting = false;
+                        this.currentJobId = null;
+                        this.showAlert('success', 'Model imported successfully! Refreshing page...');
+                        
+                        // Refresh the page after a short delay
+                        setTimeout(() => {
+                            window.location.reload();
+                        }, 2000);
+                    } else if (jobData.error) {
+                        clearInterval(this.jobPollInterval);
+                        this.isSubmitting = false;
+                        this.currentJobId = null;
+                        this.showAlert('error', 'Import failed: ' + jobData.error);
+                    }
+                } catch (error) {
+                    console.error('Error polling job status:', error);
+                }
+            }, 1000);
+        },
+        
+        initializeCodeMirror() {
+            if (this.yamlEditor) {
+                return; // Already initialized
+            }
+            
+            const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
+            
+            this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
+                mode: 'yaml',
+                theme: 'default',
+                lineNumbers: true,
+                autoRefresh: true,
+                indentUnit: 2,
+                tabSize: 2,
+                indentWithTabs: false,
+                lineWrapping: true,
+                styleActiveLine: true,
+                matchBrackets: true,
+                autoCloseBrackets: true,
+                value: initialValue
+            });
+        },
+        
+        bindAdvancedEvents() {
+            if (!this.yamlEditor) return;
+            
+            // Button events
+            const saveBtn = document.getElementById('saveBtn');
+            const validateBtn = document.getElementById('validateBtn');
+            const formatYamlBtn = document.getElementById('formatYamlBtn');
+            const copyYamlBtn = document.getElementById('copyYamlBtn');
+            
+            if (saveBtn) {
+                saveBtn.addEventListener('click', () => this.saveConfig());
+            }
+            if (validateBtn) {
+                validateBtn.addEventListener('click', () => this.validateConfig());
+            }
+            if (formatYamlBtn) {
+                formatYamlBtn.addEventListener('click', () => this.formatYaml());
+            }
+            if (copyYamlBtn) {
+                copyYamlBtn.addEventListener('click', () => this.copyYaml());
+            }
+        },
+        
+        getDefaultConfig() {
+            return `# Model Configuration
 name: my-model
 backend: llama-cpp
 parameters:
@@ -286,186 +690,150 @@ parameters:
 #   - chat
 #   - completion
 `;
-    }
-
-    initializeCodeMirror() {
-        const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
+        },
        
-        this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
-            mode: 'yaml',
-            theme: 'default',
-            lineNumbers: true,
-            autoRefresh: true,
-            indentUnit: 2,
-            tabSize: 2,
-            indentWithTabs: false,
-            lineWrapping: true,
-            styleActiveLine: true,
-            matchBrackets: true,
-            autoCloseBrackets: true,
-            value: initialValue
-        });
-    }
-
-    bindEvents() {
-        // Button events
-        document.getElementById('saveBtn').addEventListener('click', () => this.saveConfig());
-        document.getElementById('validateBtn').addEventListener('click', () => this.validateConfig());
-        document.getElementById('formatYamlBtn').addEventListener('click', () => this.formatYaml());
-        document.getElementById('copyYamlBtn').addEventListener('click', () => this.copyYaml());
-    }
-
-    validateConfig() {
-        try {
-            const yamlContent = this.yamlEditor.getValue();
-            const config = jsyaml.load(yamlContent);
-            
-            if (!config || typeof config !== 'object') {
-                throw new Error('Invalid YAML structure');
-            }
-            
-            if (!config.name) {
-                throw new Error('Model name is required');
-            }
-            if (!config.backend) {
-                throw new Error('Backend is required');
-            }
-            if (!config.parameters || !config.parameters.model) {
-                throw new Error('Model file/path is required in parameters.model');
-            }
-            
-            this.showAlert('success', 'Configuration is valid!');
-        } catch (error) {
-            this.showAlert('error', 'Validation failed: ' + error.message);
-        }
-    }
-
-    async saveConfig() {
-        try {
-            // Validate before saving
-            const yamlContent = this.yamlEditor.getValue();
-            const config = jsyaml.load(yamlContent);
-            
-            if (!config || typeof config !== 'object') {
-                throw new Error('Invalid YAML structure');
-            }
-            
-            if (!config.name) {
-                throw new Error('Model name is required');
-            }
-            if (!config.backend) {
-                throw new Error('Backend is required');
-            }
-            if (!config.parameters || !config.parameters.model) {
-                throw new Error('Model file/path is required in parameters.model');
-            }
-            
-            const endpoint = this.isEditMode ? `/models/edit/${this.modelName}` : '/models/import';
-            
-            const response = await fetch(endpoint, {
-                method: 'POST',
-                headers: {
-                    'Content-Type': 'application/x-yaml',
-                },
-                body: yamlContent
-            });
-
-            const result = await response.json();
-            
-            if (result.success) {
-                this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
-                if (!this.isEditMode && config.name) {
-                    setTimeout(() => {
-                        window.location.href = `/models/edit/${config.name}`;
-                    }, 2000);
+        validateConfig() {
+            try {
+                const yamlContent = this.yamlEditor.getValue();
+                const config = jsyaml.load(yamlContent);
+                
+                if (!config || typeof config !== 'object') {
+                    throw new Error('Invalid YAML structure');
                }
-            } else {
-                this.showAlert('error', result.error || 'Failed to save configuration');
+                
+                if (!config.name) {
+                    throw new Error('Model name is required');
+                }
+                if (!config.backend) {
+                    throw new Error('Backend is required');
+                }
+                if (!config.parameters || !config.parameters.model) {
+                    throw new Error('Model file/path is required in parameters.model');
+                }
+                
+                this.showAlert('success', 'Configuration is valid!');
+            } catch (error) {
+                this.showAlert('error', 'Validation failed: ' + error.message);
            }
-        } catch (error) {
-            this.showAlert('error', 'Failed to save: ' + error.message);
-        }
-    }
+        },
+        
+        async saveConfig() {
+            try {
+                // Validate before saving
+                const yamlContent = this.yamlEditor.getValue();
+                const config = jsyaml.load(yamlContent);
+                
+                if (!config || typeof config !== 'object') {
+                    throw new Error('Invalid YAML structure');
+                }
+                
+                if (!config.name) {
+                    throw new Error('Model name is required');
+                }
+                if (!config.backend) {
+                    throw new Error('Backend is required');
+                }
+                if (!config.parameters || !config.parameters.model) {
+                    throw new Error('Model file/path is required in parameters.model');
+                }
+                
+                const endpoint = this.isEditMode ? `/models/edit/{{.ModelName}}` : '/models/import';
+                
+                const response = await fetch(endpoint, {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/x-yaml',
+                    },
+                    body: yamlContent
+                });

-    formatYaml() {
-        try {
+                const result = await response.json();
+                
+                if (result.success) {
+                    this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
+                    if (!this.isEditMode && config.name) {
+                        setTimeout(() => {
+                            window.location.href = `/models/edit/${config.name}`;
+                        }, 2000);
+                    }
+                } else {
+                    this.showAlert('error', result.error || 'Failed to save configuration');
+                }
+            } catch (error) {
+                this.showAlert('error', 'Failed to save: ' + error.message);
+            }
+        },
+        
+        formatYaml() {
+            try {
+                const yamlContent = this.yamlEditor.getValue();
+                const parsed = jsyaml.load(yamlContent);
+                const formatted = jsyaml.dump(parsed, {
+                    indent: 2,
+                    lineWidth: 120,
+                    noRefs: true,
+                    sortKeys: false
+                });
+                this.yamlEditor.setValue(formatted);
+                this.showAlert('success', 'YAML formatted successfully');
+            } catch (error) {
+                this.showAlert('error', 'Failed to format YAML: ' + error.message);
+            }
+        },
+        
+        copyYaml() {
            const yamlContent = this.yamlEditor.getValue();
-            const parsed = jsyaml.load(yamlContent);
-            const formatted = jsyaml.dump(parsed, {
-                indent: 2,
-                lineWidth: 120,
-                noRefs: true,
-                sortKeys: false
+            navigator.clipboard.writeText(yamlContent).then(() => {
+                this.showAlert('success', 'YAML copied to clipboard');
+            }).catch(err => {
+                // Fallback for older browsers
+                const textArea = document.createElement('textarea');
+                textArea.value = yamlContent;
+                document.body.appendChild(textArea);
+                textArea.select();
+                document.execCommand('copy');
+                document.body.removeChild(textArea);
+                this.showAlert('success', 'YAML copied to clipboard');
            });
-            this.yamlEditor.setValue(formatted);
-            this.showAlert('success', 'YAML formatted successfully');
-        } catch (error) {
-            this.showAlert('error', 'Failed to format YAML: ' + error.message);
-        }
-    }
-
-    copyYaml() {
-        const yamlContent = this.yamlEditor.getValue();
-        navigator.clipboard.writeText(yamlContent).then(() => {
-            this.showAlert('success', 'YAML copied to clipboard');
-        }).catch(err => {
-            // Fallback for older browsers
-            const textArea = document.createElement('textarea');
-            textArea.value = yamlContent;
-            document.body.appendChild(textArea);
-            textArea.select();
-            document.execCommand('copy');
-            document.body.removeChild(textArea);
-            this.showAlert('success', 'YAML copied to clipboard');
-        });
-    }
-
-    showAlert(type, message) {
-        const container = document.getElementById('alertContainer');
-        const alertClasses = {
-            success: 'alert alert-success',
-            error: 'alert alert-error',
-            warning: 'alert alert-warning',
-            info: 'alert alert-info'
-        };
+        },
        
-        const alertIcons = {
-            success: 'fas fa-check-circle',
-            error: 'fas fa-exclamation-triangle',
-            warning: 'fas fa-exclamation-circle',
-            info: 'fas fa-info-circle'
-        };
-        
-        container.innerHTML = `
-            <div class="${alertClasses[type]}">
-                <div class="flex items-center">
-                    <i class="${alertIcons[type]} mr-3 text-lg"></i>
-                    <span class="flex-1">${message}</span>
-                    <button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
-                        <i class="fas fa-times"></i>
-                    </button>
+        showAlert(type, message) {
+            const container = document.getElementById('alertContainer');
+            const alertClasses = {
+                success: 'alert alert-success',
+                error: 'alert alert-error',
+                warning: 'alert alert-warning',
+                info: 'alert alert-info'
+            };
+            
+            const alertIcons = {
+                success: 'fas fa-check-circle',
+                error: 'fas fa-exclamation-triangle',
+                warning: 'fas fa-exclamation-circle',
+                info: 'fas fa-info-circle'
+            };
+            
+            container.innerHTML = `
+                <div class="${alertClasses[type]}">
+                    <div class="flex items-center">
+                        <i class="${alertIcons[type]} mr-3 text-lg"></i>
+                        <span class="flex-1">${message}</span>
+                        <button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
+                            <i class="fas fa-times"></i>
+                        </button>
+                    </div>
                </div>
-            </div>
-        `;
-        
-        if (type === 'success' || type === 'info') {
-            setTimeout(() => {
-                const alert = container.querySelector('div');
-                if (alert) alert.remove();
-            }, 5000);
+            `;
+            
+            if (type === 'success' || type === 'info') {
+                setTimeout(() => {
+                    const alert = container.querySelector('div');
+                    if (alert) alert.remove();
+                }, 5000);
+            }
        }
    }
-
-    clearAlert() {
-        document.getElementById('alertContainer').innerHTML = '';
-    }
 }
-
-// Initialize the editor when the page loads
-let modelEditor;
-document.addEventListener('DOMContentLoaded', () => {
-    modelEditor = new ModelEditor();
-});
 </script>

 </body>
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -4,13 +4,13 @@

 <body class="bg-[#101827] text-[#E5E7EB]">
 <div class="flex flex-col min-h-screen" x-data="modelsGallery()">
-   
+
    {{template "views/partials/navbar" .}}
-    
+
    <!-- Notifications -->
    <div class="fixed top-20 right-4 z-50 space-y-2" style="max-width: 400px;">
        <template x-for="notification in notifications" :key="notification.id">
-            <div x-show="true" 
+            <div x-show="true"
                 x-transition:enter="transform ease-out duration-300 transition"
                 x-transition:enter-start="translate-x-full opacity-0"
                 x-transition:enter-end="translate-x-0 opacity-100"
@@ -31,7 +31,7 @@
            </div>
        </template>
    </div>
-    
+
    <div class="container mx-auto px-4 py-8 flex-grow">

        <!-- Hero Header -->
@@ -41,7 +41,7 @@
                <div class="absolute inset-0 bg-gradient-to-r from-[#38BDF8]/20 to-[#8B5CF6]/20"></div>
                <div class="absolute top-0 left-0 w-full h-full" style="background-image: radial-gradient(circle at 1px 1px, rgba(56,189,248,0.15) 1px, transparent 0); background-size: 20px 20px;"></div>
            </div>
-            
+
            <div class="relative max-w-5xl mx-auto text-center">
                <h1 class="text-4xl md:text-5xl font-bold text-[#E5E7EB] mb-4">
                    <span class="bg-clip-text text-transparent bg-gradient-to-r from-[#38BDF8] via-[#8B5CF6] to-[#38BDF8]">
@@ -62,7 +62,7 @@
                        <span class="font-semibold text-purple-300" x-text="repositories.length"></span>
                        <span class="text-gray-300 ml-1">repositories</span>
                    </div>
-                    <a href="https://localai.io/models/" target="_blank" 
+                    <a href="https://localai.io/models/" target="_blank"
                       class="flex items-center bg-blue-600/80 hover:bg-blue-600 text-white px-4 py-2 rounded-full transition-all duration-300 hover:scale-105">
                        <i class="fas fa-info-circle mr-2"></i>
                        <span>Documentation</span>
@@ -71,13 +71,13 @@
                </div>
            </div>
        </div>
-        
+
        {{template "views/partials/inprogress" .}}

        <!-- Search and Filter Section -->
        <div class="relative bg-gradient-to-br from-gray-800/80 to-gray-900/80 rounded-2xl p-8 mb-8 shadow-xl border border-gray-700/50 backdrop-blur-sm">
            <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-blue-500/5 to-purple-500/5"></div>
-            
+
            <div class="relative">
                <!-- Search Input -->
                <div class="mb-8">
@@ -89,11 +89,11 @@
                        <div class="absolute inset-y-0 start-0 flex items-center ps-4 pointer-events-none">
                            <i class="fas fa-search text-gray-400"></i>
                        </div>
-                        <input 
+                        <input
                            x-model="searchTerm"
                            @input.debounce.500ms="fetchModels()"
-                            class="w-full pl-12 pr-16 py-4 text-base font-normal text-gray-300 bg-gray-900/90 border border-gray-700/70 rounded-xl transition-all duration-300 focus:text-gray-200 focus:bg-gray-900 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/50 focus:outline-none" 
-                            type="search" 
+                            class="w-full pl-12 pr-16 py-4 text-base font-normal text-gray-300 bg-gray-900/90 border border-gray-700/70 rounded-xl transition-all duration-300 focus:text-gray-200 focus:bg-gray-900 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/50 focus:outline-none"
+                            type="search"
                            placeholder="Search models by name, tag, or description...">
                        <span class="absolute right-4 top-4" x-show="loading">
                            <svg class="animate-spin h-6 w-6 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
@@ -103,7 +103,7 @@
                        </span>
                    </div>
                </div>
-                
+
                <!-- Filter by Type -->
                <div class="mb-8">
                    <h3 class="text-lg font-semibold text-white mb-4 flex items-center">
@@ -153,7 +153,7 @@
                        </button>
                    </div>
                </div>
-                
+
                <!-- Filter by Tags -->
                <div x-show="allTags.length > 0">
                    <h3 class="text-lg font-semibold text-white mb-4 flex items-center">
@@ -210,12 +210,12 @@
                                <tr class="hover:bg-[#38BDF8]/10 transition-colors duration-200">
                                    <!-- Icon -->
                                    <td class="px-6 py-4">
-                                        <img :src="model.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'" 
+                                        <img :src="model.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'"
                                             class="w-12 h-12 object-cover rounded-lg border border-[#38BDF8]/30"
                                             loading="lazy"
                                             :alt="model.name">
                                    </td>
-                                    
+
                                    <!-- Model Name -->
                                    <td class="px-6 py-4">
                                        <div class="flex flex-col">
@@ -228,12 +228,12 @@
                                            </div>
                                        </div>
                                    </td>
-                                    
+
                                    <!-- Description -->
                                    <td class="px-6 py-4">
                                        <div class="text-sm text-[#94A3B8] max-w-xs truncate" x-text="model.description" :title="model.description"></div>
                                    </td>
-                                    
+
                                    <!-- Repository -->
                                    <td class="px-6 py-4">
                                        <span class="inline-flex items-center text-xs px-2 py-1 rounded bg-[#38BDF8]/10 text-[#E5E7EB] border border-[#38BDF8]/30">
@@ -241,7 +241,7 @@
                                            <span x-text="model.gallery"></span>
                                        </span>
                                    </td>
-                                    
+
                                    <!-- License -->
                                    <td class="px-6 py-4">
                                        <span x-show="model.license" class="inline-flex items-center text-xs px-2 py-1 rounded bg-[#8B5CF6]/10 text-[#E5E7EB] border border-[#8B5CF6]/30">
@@ -250,7 +250,7 @@
                                        </span>
                                        <span x-show="!model.license" class="text-xs text-[#94A3B8]">-</span>
                                    </td>
-                                    
+
                                    <!-- Status -->
                                    <td class="px-6 py-4">
                                        <!-- Processing State -->
@@ -265,7 +265,7 @@
                                                <div class="progress-bar-table" :style="'width:' + (jobProgress[model.jobID] || 0) + '%'"></div>
                                            </div>
                                        </div>
-                                        
+
                                        <!-- Installed State -->
                                        <div x-show="!model.processing && model.installed">
                                            <span class="inline-flex items-center text-xs px-2 py-1 rounded bg-green-500/20 text-green-300 border border-green-500/30">
@@ -273,7 +273,7 @@
                                                Installed
                                            </span>
                                        </div>
-                                        
+
                                        <!-- Not Installed State -->
                                        <div x-show="!model.processing && !model.installed">
                                            <span class="inline-flex items-center text-xs px-2 py-1 rounded bg-[#1E293B] text-[#94A3B8] border border-[#38BDF8]/30">
@@ -282,42 +282,42 @@
                                            </span>
                                        </div>
                                    </td>
-                                    
+
                                    <!-- Actions -->
                                    <td class="px-6 py-4">
                                        <div class="flex items-center justify-end gap-2">
                                            <!-- Info Button -->
-                                            <button @click="openModal(model)" 
+                                            <button @click="openModal(model)"
                                                    class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#1E293B] hover:bg-[#38BDF8]/20 text-xs font-medium text-[#E5E7EB] transition duration-200 border border-[#38BDF8]/30"
                                                    title="View details">
                                                <i class="fas fa-info-circle"></i>
                                            </button>
-                                            
+
                                            <!-- Installed State Actions -->
                                            <template x-if="!model.processing && model.installed">
                                                <div class="flex gap-2">
-                                                    <button @click="reinstallModel(model.id)" 
+                                                    <button @click="reinstallModel(model.id)"
                                                            class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#38BDF8] hover:bg-[#38BDF8]/80 text-xs font-medium text-white transition duration-200"
                                                            title="Reinstall">
                                                        <i class="fa-solid fa-arrow-rotate-right"></i>
                                                    </button>
-                                                    <button @click="deleteModel(model.id)" 
+                                                    <button @click="deleteModel(model.id)"
                                                            class="inline-flex items-center px-3 py-1.5 rounded-lg bg-red-600 hover:bg-red-700 text-xs font-medium text-white transition duration-200"
                                                            title="Delete">
                                                        <i class="fa-solid fa-trash"></i>
                                                    </button>
                                                </div>
                                            </template>
-                                            
+
                                            <!-- Not Installed State Actions -->
                                            <template x-if="!model.processing && !model.installed">
                                                <div class="flex gap-2">
-                                                    <button @click="getConfig(model.id)" 
+                                                    <button @click="getConfig(model.id)"
                                                            class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#8B5CF6]/20 hover:bg-[#8B5CF6]/40 text-xs font-medium text-[#E5E7EB] transition duration-200 border border-[#8B5CF6]/30"
                                                            title="Get config">
                                                        <i class="fa-solid fa-file-code"></i>
                                                    </button>
-                                                    <button @click="installModel(model.id)" 
+                                                    <button @click="installModel(model.id)"
                                                            class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#38BDF8] hover:bg-[#38BDF8]/80 text-xs font-medium text-white transition duration-200"
                                                            title="Install">
                                                        <i class="fa-solid fa-download"></i>
@@ -334,7 +334,7 @@
            </div>

            <!-- Modal -->
-            <div x-show="selectedModel" 
+            <div x-show="selectedModel"
                 x-transition
                 @click.away="closeModal()"
                 class="fixed top-0 right-0 left-0 z-50 flex justify-center items-center w-full md:inset-0 h-full max-h-full bg-gray-900/50"
@@ -344,7 +344,7 @@
                        <!-- Modal Header -->
                        <div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
                            <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selectedModel?.name"></h3>
-                            <button @click="closeModal()" 
+                            <button @click="closeModal()"
                                    class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white">
                                <svg class="w-3 h-3" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
                                    <path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
@@ -355,11 +355,11 @@
                        <!-- Modal Body -->
                        <div class="p-4 md:p-5 space-y-4 overflow-y-auto flex-1 min-h-0">
                            <div class="flex justify-center items-center">
-                                <img :src="selectedModel?.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'" 
-                                     class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3" 
+                                <img :src="selectedModel?.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'"
+                                     class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3"
                                     loading="lazy">
                            </div>
-                            <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-text="selectedModel?.description"></p>
+                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full markdown-content" x-html="renderMarkdown(selectedModel?.description)"></div>
                            <hr>
                            <template x-if="selectedModel?.urls && selectedModel.urls.length > 0">
                                <div>
@@ -376,6 +376,21 @@
                                    </ul>
                                </div>
                            </template>
+                            <template x-if="selectedModel?.additionalFiles && selectedModel.additionalFiles.length > 0">
+                                <div>
+                                    <p class="text-sm font-semibold text-gray-900 dark:text-white mb-2">Files</p>
+                                    <ul>
+                                        <template x-for="file in selectedModel.additionalFiles" :key="file">
+                                            <li class="mb-0">
+                                                <p class="text-base leading-tight text-gray-500 dark:text-gray-400">
+                                                    <i class="fas fa-file pr-2"></i>
+                                                    <span x-text="file.filename"></span>
+                                                </p>
+                                            </li>
+                                        </template>
+                                    </ul>
+                                </div>
+                            </template>
                            <template x-if="selectedModel?.tags && selectedModel.tags.length > 0">
                                <div>
                                    <p class="text-sm mb-3 font-semibold text-gray-900 dark:text-white">Tags</p>
@@ -393,7 +408,7 @@
                        </div>
                        <!-- Modal Footer -->
                        <div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
-                            <button @click="closeModal()" 
+                            <button @click="closeModal()"
                                    class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">
                                Close
                            </button>
@@ -406,7 +421,7 @@
        <!-- Pagination -->
        <div x-show="totalPages > 1" class="flex justify-center mt-12">
            <div class="flex items-center gap-4 bg-gray-800/60 rounded-2xl p-4 backdrop-blur-sm border border-gray-700/50">
-                <button @click="goToPage(currentPage - 1)" 
+                <button @click="goToPage(currentPage - 1)"
                    :disabled="currentPage <= 1"
                    :class="currentPage <= 1 ? 'opacity-50 cursor-not-allowed' : ''"
                    class="group flex items-center justify-center h-12 w-12 bg-gray-700/80 hover:bg-indigo-600 text-gray-300 hover:text-white rounded-xl shadow-lg transition-all duration-300 ease-in-out transform hover:scale-110">
@@ -418,7 +433,7 @@
                    <span class="text-gray-400">of</span>
                    <span class="text-white font-bold text-lg mx-2" x-text="totalPages"></span>
                </div>
-                <button @click="goToPage(currentPage + 1)" 
+                <button @click="goToPage(currentPage + 1)"
                    :disabled="currentPage >= totalPages"
                    :class="currentPage >= totalPages ? 'opacity-50 cursor-not-allowed' : ''"
                    class="group flex items-center justify-center h-12 w-12 bg-gray-700/80 hover:bg-indigo-600 text-gray-300 hover:text-white rounded-xl shadow-lg transition-all duration-300 ease-in-out transform hover:scale-110">
@@ -495,6 +510,42 @@ tbody tr:last-child td:first-child {
 tbody tr:last-child td:last-child {
    border-bottom-right-radius: 1rem;
 }
+
+/* Markdown content overflow handling */
+.markdown-content {
+    word-wrap: break-word;
+    overflow-wrap: anywhere;
+    max-width: 100%;
+}
+
+.markdown-content pre {
+    overflow-x: auto;
+    max-width: 100%;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+
+.markdown-content code {
+    word-wrap: break-word;
+    overflow-wrap: break-word;
+}
+
+.markdown-content pre code {
+    white-space: pre;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content table {
+    max-width: 100%;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content img {
+    max-width: 100%;
+    height: auto;
+}
 </style>

 <script>
@@ -511,24 +562,24 @@ function modelsGallery() {
        selectedModel: null,
        jobProgress: {},
        notifications: [],
-        
+
        init() {
            this.fetchModels();
            // Poll for job progress every 600ms
            setInterval(() => this.pollJobs(), 600);
        },
-        
+
        addNotification(message, type = 'error') {
            const id = Date.now();
            this.notifications.push({ id, message, type });
            // Auto-dismiss after 10 seconds
            setTimeout(() => this.dismissNotification(id), 10000);
        },
-        
+
        dismissNotification(id) {
            this.notifications = this.notifications.filter(n => n.id !== id);
        },
-        
+
        async fetchModels() {
            this.loading = true;
            try {
@@ -539,7 +590,7 @@ function modelsGallery() {
                });
                const response = await fetch(`/api/models?${params}`);
                const data = await response.json();
-                
+
                this.models = data.models || [];
                this.allTags = data.allTags || [];
                this.repositories = data.repositories || [];
@@ -552,20 +603,20 @@ function modelsGallery() {
                this.loading = false;
            }
        },
-        
+
        filterByTerm(term) {
            this.searchTerm = term;
            this.currentPage = 1;
            this.fetchModels();
        },
-        
+
        goToPage(page) {
            if (page >= 1 && page <= this.totalPages) {
                this.currentPage = page;
                this.fetchModels();
            }
        },
-        
+
        async installModel(modelId) {
            try {
                const response = await fetch(`/api/models/install/${encodeURIComponent(modelId)}`, {
@@ -586,12 +637,12 @@ function modelsGallery() {
                alert('Failed to start installation');
            }
        },
-        
+
        async deleteModel(modelId) {
            if (!confirm('Are you sure you wish to delete the model?')) {
                return;
            }
-            
+
            try {
                const response = await fetch(`/api/models/delete/${encodeURIComponent(modelId)}`, {
                    method: 'POST'
@@ -610,11 +661,11 @@ function modelsGallery() {
                alert('Failed to start deletion');
            }
        },
-        
+
        async reinstallModel(modelId) {
            this.installModel(modelId);
        },
-        
+
        async getConfig(modelId) {
            try {
                const response = await fetch(`/api/models/config/${encodeURIComponent(modelId)}`, {
@@ -627,24 +678,24 @@ function modelsGallery() {
                alert('Failed to get configuration');
            }
        },
-        
+
        async pollJobs() {
            const processingModels = this.models.filter(m => m.processing && m.jobID);
-            
+
            for (const model of processingModels) {
                try {
                    const response = await fetch(`/api/models/job/${model.jobID}`);
                    const jobData = await response.json();
-                    
+
                    // Handle queued status
                    if (jobData.queued) {
                        this.jobProgress[model.jobID] = 0;
                        // Keep processing state but don't show error
                        continue;
                    }
-                    
+
                    this.jobProgress[model.jobID] = jobData.progress || 0;
-                    
+
                    if (jobData.completed) {
                        model.processing = false;
                        model.installed = !jobData.deletion;
@@ -655,7 +706,7 @@ function modelsGallery() {
                        // Refresh the models list to get updated state
                        this.fetchModels();
                    }
-                    
+
                    if (jobData.error) {
                        model.processing = false;
                        delete this.jobProgress[model.jobID];
@@ -668,11 +719,25 @@ function modelsGallery() {
                }
            }
        },
-        
+
+        renderMarkdown(text) {
+            if (!text) return '';
+            try {
+                if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
+                    return text; // Return plain text if libraries not loaded
+                }
+                const html = marked.parse(text);
+                return DOMPurify.sanitize(html);
+            } catch (error) {
+                console.error('Error rendering markdown:', error);
+                return text;
+            }
+        },
+
        openModal(model) {
            this.selectedModel = model;
        },
-        
+
        closeModal() {
            this.selectedModel = null;
        }
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -10,6 +10,110 @@
    {{template "views/partials/inprogress" .}}
    
    <div class="container mx-auto px-4 py-8 flex-grow">
+        {{ if eq .P2PToken "" }}
+        <!-- P2P Disabled - Wizard Guide -->
+        <div class="relative bg-[#1E293B]/80 border border-[#8B5CF6]/20 rounded-2xl p-12 shadow-xl backdrop-blur-sm">
+            <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-purple-500/5 to-cyan-500/5"></div>
+            <div class="relative text-center max-w-4xl mx-auto">
+                <div class="inline-flex items-center justify-center w-20 h-20 rounded-full bg-[#8B5CF6]/10 border border-[#8B5CF6]/20 mb-6">
+                    <i class="text-[#8B5CF6] text-3xl fas fa-circle-nodes"></i>
+                </div>
+                <h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-6">
+                    <span class="bg-clip-text text-transparent bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]">
+                        P2P Distribution Not Enabled
+                    </span>
+                </h2>
+                <p class="text-xl text-[#94A3B8] mb-8 leading-relaxed">
+                    Enable peer-to-peer distribution to scale your AI workloads across multiple devices. Share instances, shard models, and pool computational resources across your network.
+                </p>
+                
+                <!-- Features Preview -->
+                <div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-10">
+                    <div class="bg-[#101827] border border-[#38BDF8]/20 rounded-xl p-4">
+                        <div class="w-10 h-10 bg-blue-500/20 rounded-lg flex items-center justify-center mx-auto mb-3">
+                            <i class="fas fa-network-wired text-[#38BDF8] text-xl"></i>
+                        </div>
+                        <h3 class="text-sm font-semibold text-[#E5E7EB] mb-2">Instance Federation</h3>
+                        <p class="text-xs text-[#94A3B8]">Load balance across multiple instances</p>
+                    </div>
+                    <div class="bg-[#101827] border border-[#8B5CF6]/20 rounded-xl p-4">
+                        <div class="w-10 h-10 bg-purple-500/20 rounded-lg flex items-center justify-center mx-auto mb-3">
+                            <i class="fas fa-puzzle-piece text-[#8B5CF6] text-xl"></i>
+                        </div>
+                        <h3 class="text-sm font-semibold text-[#E5E7EB] mb-2">Model Sharding</h3>
+                        <p class="text-xs text-[#94A3B8]">Split large models across workers</p>
+                    </div>
+                    <div class="bg-[#101827] border border-green-500/20 rounded-xl p-4">
+                        <div class="w-10 h-10 bg-green-500/20 rounded-lg flex items-center justify-center mx-auto mb-3">
+                            <i class="fas fa-share-alt text-green-400 text-xl"></i>
+                        </div>
+                        <h3 class="text-sm font-semibold text-[#E5E7EB] mb-2">Resource Sharing</h3>
+                        <p class="text-xs text-[#94A3B8]">Pool resources from multiple devices</p>
+                    </div>
+                </div>
+
+                <!-- Setup Instructions -->
+                <div class="bg-[#101827] border border-[#8B5CF6]/20 rounded-xl p-6 mb-8 text-left">
+                    <h3 class="text-lg font-bold text-[#E5E7EB] mb-4 flex items-center">
+                        <i class="fas fa-rocket text-[#8B5CF6] mr-2"></i>
+                        How to Enable P2P
+                    </h3>
+                    <div class="space-y-4">
+                        <div class="flex items-start">
+                            <div class="flex-shrink-0 w-8 h-8 rounded-full bg-[#8B5CF6]/20 flex items-center justify-center mr-3 mt-0.5">
+                                <span class="text-[#8B5CF6] font-bold text-sm">1</span>
+                            </div>
+                            <div class="flex-1">
+                                <p class="text-[#E5E7EB] font-medium mb-2">Start LocalAI with P2P enabled</p>
+                                <code class="block bg-[#1E293B] text-[#38BDF8] p-3 rounded-lg text-sm border border-[#38BDF8]/20">
+                                    local-ai run --p2p
+                                </code>
+                                <p class="text-[#94A3B8] text-sm mt-2">This will automatically generate a network token for you.</p>
+                            </div>
+                        </div>
+                        <div class="flex items-start">
+                            <div class="flex-shrink-0 w-8 h-8 rounded-full bg-[#8B5CF6]/20 flex items-center justify-center mr-3 mt-0.5">
+                                <span class="text-[#8B5CF6] font-bold text-sm">2</span>
+                            </div>
+                            <div class="flex-1">
+                                <p class="text-[#E5E7EB] font-medium mb-2">Or use an existing token</p>
+                                <code class="block bg-[#1E293B] text-[#38BDF8] p-3 rounded-lg text-sm border border-[#38BDF8]/20">
+                                    export TOKEN="your-token-here"<br>
+                                    local-ai run --p2p
+                                </code>
+                                <p class="text-[#94A3B8] text-sm mt-2">If you already have a token from another instance, you can reuse it.</p>
+                            </div>
+                        </div>
+                        <div class="flex items-start">
+                            <div class="flex-shrink-0 w-8 h-8 rounded-full bg-[#8B5CF6]/20 flex items-center justify-center mr-3 mt-0.5">
+                                <span class="text-[#8B5CF6] font-bold text-sm">3</span>
+                            </div>
+                            <div class="flex-1">
+                                <p class="text-[#E5E7EB] font-medium mb-2">Access the P2P dashboard</p>
+                                <p class="text-[#94A3B8] text-sm">Once enabled, refresh this page to see your network token and start connecting nodes.</p>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+                
+                <div class="flex flex-wrap justify-center gap-4">
+                    <a href="https://localai.io/features/distribute/" target="_blank" 
+                       class="inline-flex items-center bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_20px_rgba(139,92,246,0.4)]">
+                        <i class="fas fa-book mr-2"></i>
+                        Documentation
+                        <i class="fas fa-external-link-alt ml-2 text-sm opacity-70"></i>
+                    </a>
+                    <a href="https://localai.io/basics/getting_started/" target="_blank" 
+                       class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#8B5CF6]/20 text-[#E5E7EB] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105">
+                        <i class="fas fa-graduation-cap mr-2"></i>
+                        Getting Started
+                        <i class="fas fa-external-link-alt ml-2 text-sm opacity-70"></i>
+                    </a>
+                </div>
+            </div>
+        </div>
+        {{ else }}
+        <!-- P2P Enabled - Full Dashboard -->
        <div class="workers mt-8">
            <!-- Hero Section with Network Animation -->
            <div class="animation-container mb-8">
@@ -129,22 +233,6 @@
                    The network token can be used to either share the instance or join a federation or a worker network. Below you will find examples on how to start a new instance or a worker with this token.
                </p>
            </div>
-        
-            <!-- Warning box if p2p token is empty and p2p is enabled -->
-            {{ if eq .P2PToken "" }}
-            <div class="bg-gradient-to-r from-red-800/70 to-red-700/70 border border-red-600/50 p-6 rounded-xl shadow-lg mb-10 text-left">
-                <div class="flex items-center mb-2">
-                    <i class="fa-solid fa-exclamation-triangle text-red-300 text-2xl mr-3"></i>
-                    <h3 class="text-xl font-bold text-white">Warning: P2P token was not specified</h3>
-                </div>
-                <p class="mb-4 text-red-200">
-                    You have to enable P2P mode by starting LocalAI with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code>. Please restart the server with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code> to generate a new token automatically that can be used to discover other nodes. If you already have a token, specify it with <code class="bg-red-900/50 px-2 py-0.5 rounded">export TOKEN=".."</code>
-                    <a href="https://localai.io/features/distribute/" target="_blank" class="text-red-300 hover:text-red-200 underline underline-offset-2">
-                       Check out the documentation for more information.
-                    </a>
-                </p>
-            </div>
-            {{ else }}

            <!-- Network Status Overview -->
            <div class="grid grid-cols-1 lg:grid-cols-3 gap-6 mb-10">
@@ -507,13 +595,15 @@ docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --
                </div>
            </div>
            <!-- Llama.cpp Box END -->    
-            {{ end }}   
        </div>
+        {{ end }}   
    </div>

    {{template "views/partials/footer" .}}
 </div>
+{{ if ne .P2PToken "" }}
 <script src="static/p2panimation.js"></script>
+{{ end }}

 <style>
    .token {
@@ -575,6 +665,7 @@ docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --
    }
 </style>

+{{ if ne .P2PToken "" }}
 <script>
 function p2pNetwork() {
    return {
@@ -623,6 +714,15 @@ function p2pNetwork() {
    }
 }
 </script>
+{{ else }}
+<script>
+function p2pNetwork() {
+    return {
+        // Empty component when P2P is disabled
+    }
+}
+</script>
+{{ end }}

 </body>
 </html>
--- a/core/http/views/partials/inprogress.html
+++ b/core/http/views/partials/inprogress.html
@@ -120,8 +120,17 @@ function operationsStatus() {
                    throw new Error('Failed to fetch operations');
                }
                const data = await response.json();
+                const previousCount = this.operations.length;
                this.operations = data.operations || [];
                
+                // If we had operations before and now we don't, refresh the page
+                if (previousCount > 0 && this.operations.length === 0) {
+                    // Small delay to ensure the user sees the completion
+                    setTimeout(() => {
+                        window.location.reload();
+                    }, 1000);
+                }
+                
                // Auto-collapse if there are many operations
                if (this.operations.length > 5 && !this.collapsed) {
                    // Don't auto-collapse, let user control it
--- a/core/p2p/sync.go
+++ b/core/p2p/sync.go
@@ -73,7 +73,7 @@ func syncState(ctx context.Context, n *node.Node, app *application.Application)
 			continue
 		}

-		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			ID:                 uuid.String(),
 			GalleryElementName: model,
 			Galleries:          app.ApplicationConfig().Galleries,
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -1,6 +1,7 @@
 package schema

 import (
+	"encoding/json"
 	"time"

 	gopsutil "github.com/shirou/gopsutil/v3/process"
@@ -157,3 +158,8 @@ type Detection struct {
 	Height    float32 `json:"height"`
 	ClassName string  `json:"class_name"`
 }
+
+type ImportModelRequest struct {
+	URI         string          `json:"uri"`
+	Preferences json.RawMessage `json:"preferences,omitempty"`
+}
--- a/core/schema/message.go
+++ b/core/schema/message.go
@@ -0,0 +1,85 @@
+package schema
+
+import (
+	"encoding/json"
+
+	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+)
+
+type Message struct {
+	// The message role
+	Role string `json:"role,omitempty" yaml:"role"`
+
+	// The message name (used for tools calls)
+	Name string `json:"name,omitempty" yaml:"name"`
+
+	// The message content
+	Content interface{} `json:"content" yaml:"content"`
+
+	StringContent string   `json:"string_content,omitempty" yaml:"string_content,omitempty"`
+	StringImages  []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
+	StringVideos  []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
+	StringAudios  []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
+
+	// A result of a function call
+	FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
+
+	ToolCalls []ToolCall `json:"tool_calls,omitempty" yaml:"tool_call,omitempty"`
+}
+
+type ToolCall struct {
+	Index        int          `json:"index"`
+	ID           string       `json:"id"`
+	Type         string       `json:"type"`
+	FunctionCall FunctionCall `json:"function"`
+}
+
+type FunctionCall struct {
+	Name      string `json:"name,omitempty"`
+	Arguments string `json:"arguments"`
+}
+
+type Messages []Message
+
+// MessagesToProto converts schema.Message slice to proto.Message slice
+// It handles content conversion, tool_calls serialization, and optional fields
+func (messages Messages) ToProto() []*proto.Message {
+	protoMessages := make([]*proto.Message, len(messages))
+	for i, message := range messages {
+		protoMessages[i] = &proto.Message{
+			Role: message.Role,
+			Name: message.Name, // needed by function calls
+		}
+
+		switch ct := message.Content.(type) {
+		case string:
+			protoMessages[i].Content = ct
+		case []interface{}:
+			// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
+			data, _ := json.Marshal(ct)
+			resultData := []struct {
+				Text string `json:"text"`
+			}{}
+			json.Unmarshal(data, &resultData)
+			for _, r := range resultData {
+				protoMessages[i].Content += r.Text
+			}
+		}
+
+		// Serialize tool_calls to JSON string if present
+		if len(message.ToolCalls) > 0 {
+			toolCallsJSON, err := json.Marshal(message.ToolCalls)
+			if err != nil {
+				log.Warn().Err(err).Msg("failed to marshal tool_calls to JSON")
+			} else {
+				protoMessages[i].ToolCalls = string(toolCallsJSON)
+			}
+		}
+
+		// Note: tool_call_id and reasoning_content are not in schema.Message yet
+		// They may need to be added to schema.Message if needed in the future
+	}
+	return protoMessages
+}
--- a/core/schema/message_test.go
+++ b/core/schema/message_test.go
@@ -0,0 +1,265 @@
+package schema_test
+
+import (
+	"encoding/json"
+
+	. "github.com/mudler/LocalAI/core/schema"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("LLM tests", func() {
+
+	Context("ToProtoMessages conversion", func() {
+		It("should convert basic message with string content", func() {
+			messages := Messages{
+				{
+					Role:    "user",
+					Content: "Hello, world!",
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("user"))
+			Expect(protoMessages[0].Content).To(Equal("Hello, world!"))
+			Expect(protoMessages[0].Name).To(BeEmpty())
+			Expect(protoMessages[0].ToolCalls).To(BeEmpty())
+		})
+
+		It("should convert message with nil content to empty string", func() {
+			messages := Messages{
+				{
+					Role:    "assistant",
+					Content: nil,
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("assistant"))
+			Expect(protoMessages[0].Content).To(Equal(""))
+		})
+
+		It("should convert message with array content (multimodal)", func() {
+			messages := Messages{
+				{
+					Role: "user",
+					Content: []interface{}{
+						map[string]interface{}{
+							"type": "text",
+							"text": "Hello",
+						},
+						map[string]interface{}{
+							"type": "text",
+							"text": " World",
+						},
+					},
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("user"))
+			Expect(protoMessages[0].Content).To(Equal("Hello World"))
+		})
+
+		It("should convert message with tool_calls", func() {
+			messages := Messages{
+				{
+					Role:    "assistant",
+					Content: "I'll call a function",
+					ToolCalls: []ToolCall{
+						{
+							Index: 0,
+							ID:    "call_123",
+							Type:  "function",
+							FunctionCall: FunctionCall{
+								Name:      "get_weather",
+								Arguments: `{"location": "San Francisco"}`,
+							},
+						},
+					},
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("assistant"))
+			Expect(protoMessages[0].Content).To(Equal("I'll call a function"))
+			Expect(protoMessages[0].ToolCalls).NotTo(BeEmpty())
+
+			// Verify tool_calls JSON is valid
+			var toolCalls []ToolCall
+			err := json.Unmarshal([]byte(protoMessages[0].ToolCalls), &toolCalls)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(toolCalls).To(HaveLen(1))
+			Expect(toolCalls[0].ID).To(Equal("call_123"))
+			Expect(toolCalls[0].FunctionCall.Name).To(Equal("get_weather"))
+		})
+
+		It("should convert message with name field", func() {
+			messages := Messages{
+				{
+					Role:    "tool",
+					Content: "Function result",
+					Name:    "get_weather",
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("tool"))
+			Expect(protoMessages[0].Content).To(Equal("Function result"))
+			Expect(protoMessages[0].Name).To(Equal("get_weather"))
+		})
+
+		It("should convert message with tool_calls and nil content", func() {
+			messages := Messages{
+				{
+					Role:    "assistant",
+					Content: nil,
+					ToolCalls: []ToolCall{
+						{
+							Index: 0,
+							ID:    "call_456",
+							Type:  "function",
+							FunctionCall: FunctionCall{
+								Name:      "search",
+								Arguments: `{"query": "test"}`,
+							},
+						},
+					},
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("assistant"))
+			Expect(protoMessages[0].Content).To(Equal(""))
+			Expect(protoMessages[0].ToolCalls).NotTo(BeEmpty())
+
+			var toolCalls []ToolCall
+			err := json.Unmarshal([]byte(protoMessages[0].ToolCalls), &toolCalls)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(toolCalls).To(HaveLen(1))
+			Expect(toolCalls[0].FunctionCall.Name).To(Equal("search"))
+		})
+
+		It("should convert multiple messages", func() {
+			messages := Messages{
+				{
+					Role:    "user",
+					Content: "Hello",
+				},
+				{
+					Role:    "assistant",
+					Content: "Hi there!",
+				},
+				{
+					Role:    "user",
+					Content: "How are you?",
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(3))
+			Expect(protoMessages[0].Role).To(Equal("user"))
+			Expect(protoMessages[0].Content).To(Equal("Hello"))
+			Expect(protoMessages[1].Role).To(Equal("assistant"))
+			Expect(protoMessages[1].Content).To(Equal("Hi there!"))
+			Expect(protoMessages[2].Role).To(Equal("user"))
+			Expect(protoMessages[2].Content).To(Equal("How are you?"))
+		})
+
+		It("should handle empty messages slice", func() {
+			messages := Messages{}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(0))
+		})
+
+		It("should handle message with all optional fields", func() {
+			messages := Messages{
+				{
+					Role:    "assistant",
+					Content: "I'll help you",
+					Name:    "test_tool",
+					ToolCalls: []ToolCall{
+						{
+							Index: 0,
+							ID:    "call_789",
+							Type:  "function",
+							FunctionCall: FunctionCall{
+								Name:      "test_function",
+								Arguments: `{"param": "value"}`,
+							},
+						},
+					},
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("assistant"))
+			Expect(protoMessages[0].Content).To(Equal("I'll help you"))
+			Expect(protoMessages[0].Name).To(Equal("test_tool"))
+			Expect(protoMessages[0].ToolCalls).NotTo(BeEmpty())
+
+			var toolCalls []ToolCall
+			err := json.Unmarshal([]byte(protoMessages[0].ToolCalls), &toolCalls)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(toolCalls).To(HaveLen(1))
+		})
+
+		It("should handle message with empty string content", func() {
+			messages := Messages{
+				{
+					Role:    "user",
+					Content: "",
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("user"))
+			Expect(protoMessages[0].Content).To(Equal(""))
+		})
+
+		It("should handle message with array content containing non-text parts", func() {
+			messages := Messages{
+				{
+					Role: "user",
+					Content: []interface{}{
+						map[string]interface{}{
+							"type": "text",
+							"text": "Hello",
+						},
+						map[string]interface{}{
+							"type": "image",
+							"url":  "https://example.com/image.jpg",
+						},
+					},
+				},
+			}
+
+			protoMessages := messages.ToProto()
+
+			Expect(protoMessages).To(HaveLen(1))
+			Expect(protoMessages[0].Role).To(Equal("user"))
+			// Should only extract text parts
+			Expect(protoMessages[0].Content).To(Equal("Hello"))
+		})
+	})
+})
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -50,7 +50,7 @@ type OpenAIResponse struct {

 type Choice struct {
 	Index        int      `json:"index"`
-	FinishReason string   `json:"finish_reason"`
+	FinishReason *string  `json:"finish_reason"`
 	Message      *Message `json:"message,omitempty"`
 	Delta        *Message `json:"delta,omitempty"`
 	Text         string   `json:"text,omitempty"`
@@ -76,39 +76,6 @@ type InputAudio struct {
 	Data string `json:"data" yaml:"data"`
 }

-type Message struct {
-	// The message role
-	Role string `json:"role,omitempty" yaml:"role"`
-
-	// The message name (used for tools calls)
-	Name string `json:"name,omitempty" yaml:"name"`
-
-	// The message content
-	Content interface{} `json:"content" yaml:"content"`
-
-	StringContent string   `json:"string_content,omitempty" yaml:"string_content,omitempty"`
-	StringImages  []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
-	StringVideos  []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
-	StringAudios  []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
-
-	// A result of a function call
-	FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
-
-	ToolCalls []ToolCall `json:"tool_calls,omitempty" yaml:"tool_call,omitempty"`
-}
-
-type ToolCall struct {
-	Index        int          `json:"index"`
-	ID           string       `json:"id"`
-	Type         string       `json:"type"`
-	FunctionCall FunctionCall `json:"function"`
-}
-
-type FunctionCall struct {
-	Name      string `json:"name,omitempty"`
-	Arguments string `json:"arguments"`
-}
-
 type OpenAIModel struct {
 	ID     string `json:"id"`
 	Object string `json:"object"`
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -1,50 +1,51 @@
 package schema

+// @Description PredictionOptions contains prediction parameters for model inference
 type PredictionOptions struct {

 	// Also part of the OpenAI official spec
 	BasicModelRequest `yaml:",inline"`

 	// Also part of the OpenAI official spec
-	Language string `json:"language"`
+	Language string `json:"language,omitempty" yaml:"language,omitempty"`

 	// Only for audio transcription
-	Translate bool `json:"translate"`
+	Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"`

 	// Also part of the OpenAI official spec. use it for returning multiple results
-	N int `json:"n"`
+	N int `json:"n,omitempty" yaml:"n,omitempty"`

 	// Common options between all the API calls, part of the OpenAI spec
-	TopP        *float64 `json:"top_p" yaml:"top_p"`
-	TopK        *int     `json:"top_k" yaml:"top_k"`
-	Temperature *float64 `json:"temperature" yaml:"temperature"`
-	Maxtokens   *int     `json:"max_tokens" yaml:"max_tokens"`
-	Echo        bool     `json:"echo"`
+	TopP        *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
+	TopK        *int     `json:"top_k,omitempty" yaml:"top_k,omitempty"`
+	Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
+	Maxtokens   *int     `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
+	Echo        bool     `json:"echo,omitempty" yaml:"echo,omitempty"`

 	// Custom parameters - not present in the OpenAI API
-	Batch         int     `json:"batch" yaml:"batch"`
-	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
-	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
+	Batch         int     `json:"batch,omitempty" yaml:"batch,omitempty"`
+	IgnoreEOS     bool    `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"`
+	RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"`

-	RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
+	RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"`

-	Keep int `json:"n_keep" yaml:"n_keep"`
+	Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"`

-	FrequencyPenalty float64  `json:"frequency_penalty" yaml:"frequency_penalty"`
-	PresencePenalty  float64  `json:"presence_penalty" yaml:"presence_penalty"`
-	TFZ              *float64 `json:"tfz" yaml:"tfz"`
+	FrequencyPenalty float64  `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
+	PresencePenalty  float64  `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
+	TFZ              *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"`

-	TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
-	Seed     *int     `json:"seed" yaml:"seed"`
+	TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"`
+	Seed     *int     `json:"seed,omitempty" yaml:"seed,omitempty"`

-	NegativePrompt      string  `json:"negative_prompt" yaml:"negative_prompt"`
-	RopeFreqBase        float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
-	RopeFreqScale       float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
-	NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
+	NegativePrompt      string  `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"`
+	RopeFreqBase        float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"`
+	RopeFreqScale       float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"`
+	NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"`

 	// Diffusers
-	ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
+	ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"`

 	// RWKV (?)
-	Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
+	Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
 }
--- a/core/schema/request.go
+++ b/core/schema/request.go
@@ -5,8 +5,9 @@ type LocalAIRequest interface {
 	ModelName(*string) string
 }

+// @Description BasicModelRequest contains the basic model request fields
 type BasicModelRequest struct {
-	Model string `json:"model" yaml:"model"`
+	Model string `json:"model,omitempty" yaml:"model,omitempty"`
 	// TODO: Should this also include the following fields from the OpenAI side of the world?
 	// If so, changes should be made to core/http/middleware/request.go to match

--- a/core/schema/schema_suite_test.go
+++ b/core/schema/schema_suite_test.go
@@ -0,0 +1,13 @@
+package schema_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestSchema(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "LocalAI Schema test suite")
+}
--- a/core/services/backends.go
+++ b/core/services/backends.go
@@ -8,7 +8,7 @@ import (
 	"github.com/rs/zerolog/log"
 )

-func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error {
+func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend, any], systemState *system.SystemState) error {
 	utils.ResetDownloadTimers()
 	g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})

--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -14,8 +14,8 @@ import (
 type GalleryService struct {
 	appConfig *config.ApplicationConfig
 	sync.Mutex
-	ModelGalleryChannel   chan GalleryOp[gallery.GalleryModel]
-	BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend]
+	ModelGalleryChannel   chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]
+	BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend, any]

 	modelLoader *model.ModelLoader
 	statuses    map[string]*GalleryOpStatus
@@ -24,8 +24,8 @@ type GalleryService struct {
 func NewGalleryService(appConfig *config.ApplicationConfig, ml *model.ModelLoader) *GalleryService {
 	return &GalleryService{
 		appConfig:             appConfig,
-		ModelGalleryChannel:   make(chan GalleryOp[gallery.GalleryModel]),
-		BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend]),
+		ModelGalleryChannel:   make(chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]),
+		BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend, any]),
 		modelLoader:           ml,
 		statuses:              make(map[string]*GalleryOpStatus),
 	}
--- a/core/services/models.go
+++ b/core/services/models.go
@@ -9,10 +9,11 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/system"
 	"github.com/mudler/LocalAI/pkg/utils"
+	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )

-func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
+func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
 	utils.ResetDownloadTimers()

 	g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
@@ -118,28 +119,32 @@ func ApplyGalleryFromString(systemState *system.SystemState, modelLoader *model.

 // processModelOperation handles the installation or deletion of a model
 func processModelOperation(
-	op *GalleryOp[gallery.GalleryModel],
+	op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig],
 	systemState *system.SystemState,
 	modelLoader *model.ModelLoader,
 	enforcePredownloadScans bool,
 	automaticallyInstallBackend bool,
 	progressCallback func(string, string, string, float64),
 ) error {
-	// delete a model
-	if op.Delete {
+	switch {
+	case op.Delete:
 		return gallery.DeleteModelFromSystem(systemState, op.GalleryElementName)
-	}
-
-	// if the request contains a gallery name, we apply the gallery from the gallery list
-	if op.GalleryElementName != "" {
+	case op.GalleryElement != nil:
+		installedModel, err := gallery.InstallModel(
+			systemState, op.GalleryElement.Name,
+			op.GalleryElement,
+			op.Req.Overrides,
+			progressCallback, enforcePredownloadScans)
+		if automaticallyInstallBackend && installedModel.Backend != "" {
+			log.Debug().Msgf("Installing backend %q", installedModel.Backend)
+			if err := gallery.InstallBackendFromGallery(op.BackendGalleries, systemState, modelLoader, installedModel.Backend, progressCallback, false); err != nil {
+				return err
+			}
+		}
+		return err
+	case op.GalleryElementName != "":
 		return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, systemState, modelLoader, op.GalleryElementName, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend)
-		// } else if op.ConfigURL != "" {
-		// 	err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL)
-		// 	if err != nil {
-		// 		return err
-		// 	}
-		// 	return cl.Preload(modelPath)
-	} else {
+	default:
 		return installModelFromRemoteConfig(systemState, modelLoader, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries)
 	}
 }
--- a/core/services/operation.go
+++ b/core/services/operation.go
@@ -5,12 +5,16 @@ import (
 	"github.com/mudler/LocalAI/pkg/xsync"
 )

-type GalleryOp[T any] struct {
+type GalleryOp[T any, E any] struct {
 	ID                 string
 	GalleryElementName string
 	Delete             bool

-	Req              T
+	Req T
+
+	// If specified, we install directly the gallery element
+	GalleryElement *E
+
 	Galleries        []config.Gallery
 	BackendGalleries []config.Gallery
 }
--- a/core/startup/model_preload.go
+++ b/core/startup/model_preload.go
@@ -1,15 +1,20 @@
 package startup

 import (
+	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"path"
 	"path/filepath"
 	"strings"
+	"time"

+	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/system"
@@ -25,7 +30,7 @@ const (
 // InstallModels will preload models from the given list of URLs and galleries
 // It will download the model if it is not already present in the model path
 // It will also try to resolve if the model is an embedded model YAML configuration
-func InstallModels(galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
+func InstallModels(galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
 	// create an error that groups all errors
 	var err error

@@ -154,7 +159,50 @@ func InstallModels(galleries, backendGalleries []config.Gallery, systemState *sy
 					err = errors.Join(err, e)
 				} else if !found {
 					log.Warn().Msgf("[startup] failed resolving model '%s'", url)
-					err = errors.Join(err, fmt.Errorf("failed resolving model '%s'", url))
+
+					if galleryService == nil {
+						err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri"))
+						continue
+					}
+
+					// TODO: we should just use the discoverModelConfig here and default to this.
+					modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
+					if discoverErr != nil {
+						err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
+						continue
+					}
+
+					uuid, uuidErr := uuid.NewUUID()
+					if uuidErr != nil {
+						err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
+						continue
+					}
+
+					galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+						Req: gallery.GalleryModel{
+							Overrides: map[string]interface{}{},
+						},
+						ID:                 uuid.String(),
+						GalleryElementName: modelConfig.Name,
+						GalleryElement:     &modelConfig,
+						BackendGalleries:   backendGalleries,
+					}
+
+					var status *services.GalleryOpStatus
+					// wait for op to finish
+					for {
+						status = galleryService.GetStatus(uuid.String())
+						if status != nil && status.Processed {
+							break
+						}
+						time.Sleep(1 * time.Second)
+					}
+
+					if status.Error != nil {
+						return status.Error
+					}
+
+					log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
 				}
 			}
 		}
--- a/core/startup/model_preload_test.go
+++ b/core/startup/model_preload_test.go
@@ -33,7 +33,7 @@ var _ = Describe("Preload test", func() {
 			url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
 			fileName := fmt.Sprintf("%s.yaml", "phi-2")

-			InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
+			InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)

 			resultFile := filepath.Join(tmpdir, fileName)

@@ -46,7 +46,7 @@ var _ = Describe("Preload test", func() {
 			url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
 			fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")

-			err := InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
+			err := InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
 			Expect(err).ToNot(HaveOccurred())

 			resultFile := filepath.Join(tmpdir, fileName)
--- a/core/templates/cache.go
+++ b/core/templates/cache.go
@@ -11,9 +11,6 @@ import (
 	"github.com/mudler/LocalAI/pkg/utils"

 	"github.com/Masterminds/sprig/v3"
-
-	"github.com/nikolalohinski/gonja/v2"
-	"github.com/nikolalohinski/gonja/v2/exec"
 )

 // Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
@@ -21,17 +18,15 @@ import (
 type TemplateType int

 type templateCache struct {
-	mu             sync.Mutex
-	templatesPath  string
-	templates      map[TemplateType]map[string]*template.Template
-	jinjaTemplates map[TemplateType]map[string]*exec.Template
+	mu            sync.Mutex
+	templatesPath string
+	templates     map[TemplateType]map[string]*template.Template
 }

 func newTemplateCache(templatesPath string) *templateCache {
 	tc := &templateCache{
-		templatesPath:  templatesPath,
-		templates:      make(map[TemplateType]map[string]*template.Template),
-		jinjaTemplates: make(map[TemplateType]map[string]*exec.Template),
+		templatesPath: templatesPath,
+		templates:     make(map[TemplateType]map[string]*template.Template),
 	}
 	return tc
 }
@@ -85,78 +80,6 @@ func (tc *templateCache) loadTemplateIfExists(templateType TemplateType, templat
 	return nil
 }

-func (tc *templateCache) initializeJinjaTemplateMapKey(tt TemplateType) {
-	if _, ok := tc.jinjaTemplates[tt]; !ok {
-		tc.jinjaTemplates[tt] = make(map[string]*exec.Template)
-	}
-}
-
-func (tc *templateCache) loadJinjaTemplateIfExists(templateType TemplateType, templateName string) error {
-	// Check if the template was already loaded
-	if _, ok := tc.jinjaTemplates[templateType][templateName]; ok {
-		return nil
-	}
-
-	// Check if the model path exists
-	// skip any error here - we run anyway if a template does not exist
-	modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)
-
-	dat := ""
-	file := filepath.Join(tc.templatesPath, modelTemplateFile)
-
-	// Security check
-	if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil {
-		return fmt.Errorf("template file outside path: %s", file)
-	}
-
-	// can either be a file in the system or a string with the template
-	if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) {
-		d, err := os.ReadFile(file)
-		if err != nil {
-			return err
-		}
-		dat = string(d)
-	} else {
-		dat = templateName
-	}
-
-	tmpl, err := gonja.FromString(dat)
-	if err != nil {
-		return err
-	}
-	tc.jinjaTemplates[templateType][templateName] = tmpl
-
-	return nil
-}
-
-func (tc *templateCache) evaluateJinjaTemplate(templateType TemplateType, templateNameOrContent string, in map[string]interface{}) (string, error) {
-	tc.mu.Lock()
-	defer tc.mu.Unlock()
-
-	tc.initializeJinjaTemplateMapKey(templateType)
-	m, ok := tc.jinjaTemplates[templateType][templateNameOrContent]
-	if !ok {
-		// return "", fmt.Errorf("template not loaded: %s", templateName)
-		loadErr := tc.loadJinjaTemplateIfExists(templateType, templateNameOrContent)
-		if loadErr != nil {
-			return "", loadErr
-		}
-		m = tc.jinjaTemplates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked
-	}
-	if m == nil {
-		return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent)
-	}
-
-	var buf bytes.Buffer
-
-	data := exec.NewContext(in)
-
-	if err := m.Execute(&buf, data); err != nil {
-		return "", err
-	}
-	return buf.String(), nil
-}
-
 func (tc *templateCache) evaluateTemplate(templateType TemplateType, templateNameOrContent string, in interface{}) (string, error) {
 	tc.mu.Lock()
 	defer tc.mu.Unlock()
--- a/core/templates/evaluator.go
+++ b/core/templates/evaluator.go
@@ -86,10 +86,6 @@ func (e *Evaluator) EvaluateTemplateForPrompt(templateType TemplateType, config
 		return in.Input, nil
 	}

-	if config.TemplateConfig.JinjaTemplate {
-		return e.evaluateJinjaTemplateForPrompt(templateType, template, in)
-	}
-
 	return e.cache.evaluateTemplate(templateType, template, in)
 }

@@ -97,72 +93,7 @@ func (e *Evaluator) evaluateTemplateForChatMessage(templateName string, messageD
 	return e.cache.evaluateTemplate(ChatMessageTemplate, templateName, messageData)
 }

-func (e *Evaluator) templateJinjaChat(templateName string, messageData []ChatMessageTemplateData, funcs []functions.Function) (string, error) {
-
-	conversation := make(map[string]interface{})
-	messages := make([]map[string]interface{}, len(messageData))
-
-	// convert from ChatMessageTemplateData to what the jinja template expects
-
-	for _, message := range messageData {
-		// TODO: this seems to cover minimum text templates. Can be expanded to cover more complex interactions
-		var data []byte
-		data, _ = json.Marshal(message.FunctionCall)
-		messages = append(messages, map[string]interface{}{
-			"role":      message.RoleName,
-			"content":   message.Content,
-			"tool_call": string(data),
-		})
-	}
-
-	conversation["messages"] = messages
-
-	// if tools are detected, add these
-	if len(funcs) > 0 {
-		conversation["tools"] = funcs
-	}
-
-	return e.cache.evaluateJinjaTemplate(ChatMessageTemplate, templateName, conversation)
-}
-
-func (e *Evaluator) evaluateJinjaTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) {
-
-	conversation := make(map[string]interface{})
-
-	conversation["system_prompt"] = in.SystemPrompt
-	conversation["content"] = in.Input
-
-	return e.cache.evaluateJinjaTemplate(templateType, templateName, conversation)
-}
-
 func (e *Evaluator) TemplateMessages(input schema.OpenAIRequest, messages []schema.Message, config *config.ModelConfig, funcs []functions.Function, shouldUseFn bool) string {
-
-	if config.TemplateConfig.JinjaTemplate {
-		var messageData []ChatMessageTemplateData
-		for messageIndex, i := range messages {
-			fcall := i.FunctionCall
-			if len(i.ToolCalls) > 0 {
-				fcall = i.ToolCalls
-			}
-			messageData = append(messageData, ChatMessageTemplateData{
-				SystemPrompt: config.SystemPrompt,
-				Role:         config.Roles[i.Role],
-				RoleName:     i.Role,
-				Content:      i.StringContent,
-				FunctionCall: fcall,
-				FunctionName: i.Name,
-				LastMessage:  messageIndex == (len(messages) - 1),
-				Function:     config.Grammar != "" && (messageIndex == (len(messages) - 1)),
-				MessageIndex: messageIndex,
-			})
-		}
-
-		templatedInput, err := e.templateJinjaChat(config.TemplateConfig.ChatMessage, messageData, funcs)
-		if err == nil {
-			return templatedInput
-		}
-	}
-
 	var predInput string
 	suppressConfigSystemPrompt := false
 	mess := []string{}
--- a/core/templates/evaluator_test.go
+++ b/core/templates/evaluator_test.go
@@ -191,25 +191,6 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in
 	},
 }

-var jinjaTest map[string]map[string]interface{} = map[string]map[string]interface{}{
-	"user": {
-		"expected": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
-		"config": &config.ModelConfig{
-			TemplateConfig: config.TemplateConfig{
-				ChatMessage:   toolCallJinja,
-				JinjaTemplate: true,
-			},
-		},
-		"functions":   []functions.Function{},
-		"shouldUseFn": false,
-		"messages": []schema.Message{
-			{
-				Role:          "user",
-				StringContent: "A long time ago in a galaxy far, far away...",
-			},
-		},
-	},
-}
 var _ = Describe("Templates", func() {
 	Context("chat message ChatML", func() {
 		var evaluator *Evaluator
@@ -237,17 +218,4 @@ var _ = Describe("Templates", func() {
 			})
 		}
 	})
-	Context("chat message jinja", func() {
-		var evaluator *Evaluator
-		BeforeEach(func() {
-			evaluator = NewEvaluator("")
-		})
-		for key := range jinjaTest {
-			foo := jinjaTest[key]
-			It("renders correctly `"+key+"`", func() {
-				templated := evaluator.TemplateMessages(schema.OpenAIRequest{}, foo["messages"].([]schema.Message), foo["config"].(*config.ModelConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
-				Expect(templated).To(Equal(foo["expected"]), templated)
-			})
-		}
-	})
 })
--- a/docs/content/docs/features/text-generation.md
+++ b/docs/content/docs/features/text-generation.md
@@ -128,16 +128,44 @@ Models can be also preloaded or downloaded on demand. To learn about model galle

 #### YAML configuration

-To use the `llama.cpp` backend, specify `llama` as the backend in the YAML file:
+To use the `llama.cpp` backend, specify `llama-cpp` as the backend in the YAML file:

 ```yaml
 name: llama
-backend: llama
+backend: llama-cpp
 parameters:
  # Relative to the models path
  model: file.gguf
 ```

+#### Backend Options
+
+The `llama.cpp` backend supports additional configuration options that can be specified in the `options` field of your model YAML configuration. These options allow fine-tuning of the backend behavior:
+
+| Option | Type | Description | Example |
+|--------|------|-------------|---------|
+| `use_jinja` or `jinja` | boolean | Enable Jinja2 template processing for chat templates. When enabled, the backend uses Jinja2-based chat templates from the model for formatting messages. | `use_jinja:true` |
+| `context_shift` | boolean | Enable context shifting, which allows the model to dynamically adjust context window usage. | `context_shift:true` |
+| `cache_ram` | integer | Set the maximum RAM cache size in MiB for KV cache. Use `-1` for unlimited (default). | `cache_ram:2048` |
+| `parallel` or `n_parallel` | integer | Enable parallel request processing. When set to a value greater than 1, enables continuous batching for handling multiple requests concurrently. | `parallel:4` |
+| `grpc_servers` or `rpc_servers` | string | Comma-separated list of gRPC server addresses for distributed inference. Allows distributing workload across multiple llama.cpp workers. | `grpc_servers:localhost:50051,localhost:50052` |
+
+**Example configuration with options:**
+
+```yaml
+name: llama-model
+backend: llama
+parameters:
+  model: model.gguf
+options:
+  - use_jinja:true
+  - context_shift:true
+  - cache_ram:4096
+  - parallel:2
+```
+
+**Note:** The `parallel` option can also be set via the `LLAMACPP_PARALLEL` environment variable, and `grpc_servers` can be set via the `LLAMACPP_GRPC_SERVERS` environment variable. Options specified in the YAML file take precedence over environment variables.
+
 #### Reference

 - [llama](https://github.com/ggerganov/llama.cpp)
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -31,10 +31,14 @@ See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported

 ### macOS Download

+For MacOS a DMG is available:
+
 <a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
 </a>

+> Note: the DMGs are not signed by Apple and shows quarantined after install. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
+
 ### Run with docker

 {{% alert icon="💡" %}}
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -51,19 +51,20 @@ LocalAI is more than just a single tool - it's a complete ecosystem:

 ## Getting Started

-The fastest way to get started is with our one-line installer:
-
-```bash
-curl https://localai.io/install.sh | sh
-```

 ### macOS Download

+You can use the DMG application for Mac:
+
 <a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
 </a>

-Or use Docker for a quick start:
+> Note: the DMGs are not signed by Apple shows as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
+
+## Docker
+
+You can use Docker for a quick start:

 ```bash
 docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
@@ -71,6 +72,14 @@ docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu

 For more detailed installation options and configurations, see our [Getting Started guide](/basics/getting_started/).

+## One-liner
+
+The fastest way to get started is with our one-line installer (Linux):
+
+```bash
+curl https://localai.io/install.sh | sh
+```
+
 ## Key Features

 - **Text Generation**: Run various LLMs locally
--- a/docs/content/docs/reference/binaries.md
+++ b/docs/content/docs/reference/binaries.md
@@ -9,11 +9,15 @@ LocalAI binaries are available for both Linux and MacOS platforms and can be exe

 ### macOS Download

+You can download the DMG and install the application:
+
 <a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
 </a> 

-Use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS:
+> Note: the DMGs are not signed by Apple as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
+
+Otherwise, use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS:

 ```bash
 curl -Lo local-ai "https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-$(uname -s)-$(uname -m)" && chmod +x local-ai && ./local-ai
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v3.6.0"
+  "version": "v3.7.0"
 }
--- a/docs/go.mod
+++ b/docs/go.mod
@@ -1,5 +1,3 @@
 module github.com/McShelby/hugo-theme-relearn.git

 go 1.19
-
-require github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 // indirect
--- a/docs/go.sum
+++ b/docs/go.sum
@@ -1,4 +0,0 @@
-github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 h1:SmpwwN3DNzJWbV+IT8gaFu07ENUFpCvKou5BHYUKuVs=
-github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200/go.mod h1:kx8MBj9T7SFR8ZClWvKZPmmUxBaltkoXvnWlZZcSnYA=
-github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2 v2.21100.20000/go.mod h1:mFberT6ZtcchrsDtfvJM7aAH2bDKLdOnruUHl0hlapI=
-github.com/twbs/bootstrap v5.3.2+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
--- a/gallery/qwen3.yaml
+++ b/gallery/qwen3.yaml
@@ -6,15 +6,20 @@ config_file: |
  backend: "llama-cpp"
  template:
    chat_message: |
-      <|im_start|>{{ .RoleName }}
-      {{ if .FunctionCall -}}
-      {{ else if eq .RoleName "tool" -}}
+      <|im_start|>{{if eq .RoleName "tool" }}user{{else}}{{ .RoleName }}{{end}}
+      {{ if eq .RoleName "tool" -}}
+      <tool_response>
      {{ end -}}
      {{ if .Content -}}
      {{.Content }}
      {{ end -}}
+      {{ if eq .RoleName "tool" -}}
+      </tool_response>
+      {{ end -}}
      {{ if .FunctionCall -}}
+      <tool_call>
      {{toJson .FunctionCall}}
+      </tool_call>
      {{ end -}}<|im_end|>
    function: |
      <|im_start|>system
--- a/go.mod
+++ b/go.mod
@@ -10,9 +10,9 @@ require (
 	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/alecthomas/kong v1.12.1
 	github.com/charmbracelet/glamour v0.10.0
-	github.com/containerd/containerd v1.7.28
+	github.com/containerd/containerd v1.7.29
 	github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
-	github.com/ebitengine/purego v0.9.0
+	github.com/ebitengine/purego v0.9.1
 	github.com/fsnotify/fsnotify v1.9.0
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46
@@ -26,19 +26,18 @@ require (
 	github.com/gpustack/gguf-parser-go v0.22.1
 	github.com/hpcloud/tail v1.0.0
 	github.com/ipfs/go-log v1.0.5
-	github.com/jaypipes/ghw v0.19.1
+	github.com/jaypipes/ghw v0.20.0
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/cpuid/v2 v2.3.0
 	github.com/libp2p/go-libp2p v0.43.0
 	github.com/lithammer/fuzzysearch v1.1.8
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.27
-	github.com/modelcontextprotocol/go-sdk v1.0.0
-	github.com/mudler/cogito v0.4.0
-	github.com/mudler/edgevpn v0.31.0
+	github.com/modelcontextprotocol/go-sdk v1.1.0
+	github.com/mudler/cogito v0.5.1
+	github.com/mudler/edgevpn v0.31.1
 	github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82
-	github.com/nikolalohinski/gonja/v2 v2.4.1
-	github.com/onsi/ginkgo/v2 v2.26.0
+	github.com/onsi/ginkgo/v2 v2.27.2
 	github.com/onsi/gomega v1.38.2
 	github.com/otiai10/copy v1.14.1
 	github.com/otiai10/openaigo v1.7.0
@@ -52,14 +51,15 @@ require (
 	github.com/streamer45/silero-vad-go v0.2.1
 	github.com/stretchr/testify v1.11.1
 	github.com/swaggo/swag v1.16.6
-	github.com/testcontainers/testcontainers-go v0.38.0
+	github.com/testcontainers/testcontainers-go v0.40.0
 	github.com/tmc/langchaingo v0.1.14
-	github.com/valyala/fasthttp v1.55.0
+	github.com/valyala/fasthttp v1.68.0
 	go.opentelemetry.io/otel v1.38.0
 	go.opentelemetry.io/otel/exporters/prometheus v0.60.0
 	go.opentelemetry.io/otel/metric v1.38.0
 	go.opentelemetry.io/otel/sdk/metric v1.38.0
 	google.golang.org/grpc v1.76.0
+	google.golang.org/protobuf v1.36.8
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 	oras.land/oras-go/v2 v2.6.0
@@ -78,7 +78,6 @@ require (
 	github.com/containerd/platforms v0.2.1 // indirect
 	github.com/cpuguy83/dockercfg v0.3.2 // indirect
 	github.com/distribution/reference v0.6.0 // indirect
-	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/fasthttp/websocket v1.5.8 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/fredbi/uri v1.1.1 // indirect
@@ -135,7 +134,7 @@ require (
 	github.com/prometheus/otlptranslator v0.0.2 // indirect
 	github.com/rymdport/portal v0.4.2 // indirect
 	github.com/savsgio/gotils v0.0.0-20240303185622-093b76447511 // indirect
-	github.com/shirou/gopsutil/v4 v4.25.5 // indirect
+	github.com/shirou/gopsutil/v4 v4.25.6 // indirect
 	github.com/srwiley/oksvg v0.0.0-20221011165216-be6e8873101c // indirect
 	github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef // indirect
 	github.com/wlynxg/anet v0.0.5 // indirect
@@ -143,13 +142,13 @@ require (
 	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
-	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/mock v0.5.2 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
 	golang.org/x/image v0.25.0 // indirect
+	golang.org/x/oauth2 v0.30.0 // indirect
+	golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 // indirect
 	golang.org/x/time v0.12.0 // indirect
-	google.golang.org/protobuf v1.36.8 // indirect
 )

 require (
@@ -160,7 +159,7 @@ require (
 	github.com/Microsoft/go-winio v0.6.2 // indirect
 	github.com/Microsoft/hcsshim v0.11.7 // indirect
 	github.com/alecthomas/chroma/v2 v2.14.0 // indirect
-	github.com/andybalholm/brotli v1.1.0 // indirect
+	github.com/andybalholm/brotli v1.2.0 // indirect
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/benbjohnson/clock v1.3.5 // indirect
@@ -180,7 +179,7 @@ require (
 	github.com/dlclark/regexp2 v1.11.0 // indirect
 	github.com/docker/cli v27.0.3+incompatible // indirect
 	github.com/docker/distribution v2.8.2+incompatible // indirect
-	github.com/docker/docker v28.5.1+incompatible
+	github.com/docker/docker v28.5.2+incompatible
 	github.com/docker/docker-credential-helpers v0.7.0 // indirect
 	github.com/docker/go-connections v0.6.0
 	github.com/docker/go-units v0.5.0 // indirect
@@ -222,7 +221,7 @@ require (
 	github.com/jaypipes/pcidb v1.1.1 // indirect
 	github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
-	github.com/klauspost/compress v1.18.0 // indirect
+	github.com/klauspost/compress v1.18.1 // indirect
 	github.com/klauspost/pgzip v1.2.5 // indirect
 	github.com/koron/go-ssdp v0.0.6 // indirect
 	github.com/libp2p/go-buffer-pool v0.1.0 // indirect
@@ -303,7 +302,6 @@ require (
 	github.com/tklauser/numcpus v0.10.0 // indirect
 	github.com/ulikunitz/xz v0.5.14 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/vbatts/tar-split v0.11.3 // indirect
 	github.com/vishvananda/netlink v1.3.0 // indirect
 	github.com/vishvananda/netns v0.0.5 // indirect
@@ -319,15 +317,15 @@ require (
 	go.uber.org/fx v1.24.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/crypto v0.41.0 // indirect
+	golang.org/x/crypto v0.43.0 // indirect
 	golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 // indirect
-	golang.org/x/mod v0.27.0 // indirect
-	golang.org/x/net v0.43.0 // indirect
-	golang.org/x/sync v0.16.0 // indirect
+	golang.org/x/mod v0.28.0 // indirect
+	golang.org/x/net v0.46.0 // indirect
+	golang.org/x/sync v0.17.0 // indirect
 	golang.org/x/sys v0.37.0 // indirect
-	golang.org/x/term v0.34.0 // indirect
-	golang.org/x/text v0.28.0 // indirect
-	golang.org/x/tools v0.36.0 // indirect
+	golang.org/x/term v0.36.0 // indirect
+	golang.org/x/text v0.30.0 // indirect
+	golang.org/x/tools v0.37.0 // indirect
 	golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
--- a/go.sum
+++ b/go.sum
@@ -23,8 +23,6 @@ github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg
 github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
 github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
-github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
-github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
 github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
 github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
 github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
@@ -44,8 +42,8 @@ github.com/alecthomas/kong v1.12.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruP
 github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
 github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
 github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
-github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
-github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
+github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
+github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
 github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
@@ -89,8 +87,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
 github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
 github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
-github.com/containerd/containerd v1.7.28 h1:Nsgm1AtcmEh4AHAJ4gGlNSaKgXiNccU270Dnf81FQ3c=
-github.com/containerd/containerd v1.7.28/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs=
+github.com/containerd/containerd v1.7.29 h1:90fWABQsaN9mJhGkoVnuzEY+o1XDPbg9BTC9QTAHnuE=
+github.com/containerd/containerd v1.7.29/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs=
 github.com/containerd/continuity v0.4.4 h1:/fNVfTJ7wIl/YPMHjf+5H32uFhl63JucB34PlCpMKII=
 github.com/containerd/continuity v0.4.4/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=
 github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
@@ -135,8 +133,8 @@ github.com/docker/cli v27.0.3+incompatible h1:usGs0/BoBW8MWxGeEtqPMkzOY56jZ6kYlS
 github.com/docker/cli v27.0.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
 github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8=
 github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
-github.com/docker/docker v28.5.1+incompatible h1:Bm8DchhSD2J6PsFzxC35TZo4TLGR2PdW/E69rU45NhM=
-github.com/docker/docker v28.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM=
+github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
 github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
 github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
 github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
@@ -147,10 +145,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
-github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
-github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/ebitengine/purego v0.9.0 h1:mh0zpKBIXDceC63hpvPuGLiJ8ZAa3DfrFTudmfi8A4k=
-github.com/ebitengine/purego v0.9.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
+github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
+github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@@ -186,8 +182,8 @@ github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BN
 github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo=
 github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M=
 github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk=
-github.com/gkampitakis/go-snaps v0.5.14 h1:3fAqdB6BCPKHDMHAKRwtPUwYexKtGrNuw8HX/T/4neo=
-github.com/gkampitakis/go-snaps v0.5.14/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc=
+github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE=
+github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc=
 github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
@@ -361,8 +357,8 @@ github.com/ipld/go-ipld-prime v0.21.0 h1:n4JmcpOlPDIxBcY037SVfpd1G+Sj1nKZah0m6QH
 github.com/ipld/go-ipld-prime v0.21.0/go.mod h1:3RLqy//ERg/y5oShXXdx5YIp50cFGOanyMctpPjsvxQ=
 github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus=
 github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc=
-github.com/jaypipes/ghw v0.19.1 h1:Lhybk6aadgEJqIxeS0h07UOL/EgMGIdxbAy6V8J7RgY=
-github.com/jaypipes/ghw v0.19.1/go.mod h1:GPrvwbtPoxYUenr74+nAnWbardIZq600vJDD5HnPsPE=
+github.com/jaypipes/ghw v0.20.0 h1:8efvHHtyrj0P4qVZ9KE43iW9tMThKoh6dEOo38f3a4w=
+github.com/jaypipes/ghw v0.20.0/go.mod h1:GPrvwbtPoxYUenr74+nAnWbardIZq600vJDD5HnPsPE=
 github.com/jaypipes/pcidb v1.1.1 h1:QmPhpsbmmnCwZmHeYAATxEaoRuiMAJusKYkUncMC0ro=
 github.com/jaypipes/pcidb v1.1.1/go.mod h1:x27LT2krrUgjf875KxQXKB0Ha/YXLdZRVmw6hH0G7g8=
 github.com/jbenet/go-temp-err-catcher v0.1.0 h1:zpb3ZH6wIE8Shj2sKS+khgRvf7T7RABoLk/+KKHggpk=
@@ -389,8 +385,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
-github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
+github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
 github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
@@ -514,8 +510,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g
 github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
 github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
 github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
-github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74=
-github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs=
+github.com/modelcontextprotocol/go-sdk v1.1.0 h1:Qjayg53dnKC4UZ+792W21e4BpwEZBzwgRW6LrjLWSwA=
+github.com/modelcontextprotocol/go-sdk v1.1.0/go.mod h1:6fM3LCm3yV7pAs8isnKLn07oKtB0MP9LHd3DfAcKw10=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -527,10 +523,10 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P
 github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
 github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
 github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
-github.com/mudler/cogito v0.4.0 h1:CkdzbQplQW6LDUM6mTqupGFiQVluy0nx7xbYgAfBVKs=
-github.com/mudler/cogito v0.4.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU=
-github.com/mudler/edgevpn v0.31.0 h1:CXwxQ2ZygzE7iKGl1J+vq9pL5PvsW2uc3qI/zgpNpp4=
-github.com/mudler/edgevpn v0.31.0/go.mod h1:DKgh9Wu/NM3UbZoPyheMXFvpu1dSLkXrqAOy3oKJN3I=
+github.com/mudler/cogito v0.5.1 h1:KK9F7pNJUopewiZRsKnDEGu49+22fPrxWYxpoWQWN7s=
+github.com/mudler/cogito v0.5.1/go.mod h1:2uhEElCTq8eXSsqJ1JF01oA5h9niXSELVKqCF1PqjEw=
+github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
+github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
 github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
 github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
@@ -571,16 +567,14 @@ github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6
 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
 github.com/nicksnyder/go-i18n/v2 v2.5.1 h1:IxtPxYsR9Gp60cGXjfuR/llTqV8aYMsC472zD0D1vHk=
 github.com/nicksnyder/go-i18n/v2 v2.5.1/go.mod h1:DrhgsSDZxoAfvVrBVLXoxZn/pN5TXqaDbq7ju94viiQ=
-github.com/nikolalohinski/gonja/v2 v2.4.1 h1:eV/OB0FQ2v3LbQkcr3S+YJGsJV3AP3I83EvTaa5zwD0=
-github.com/nikolalohinski/gonja/v2 v2.4.1/go.mod h1:UIzXPVuOsr5h7dZ5DUbqk3/Z7oFA/NLGQGMjqT4L2aU=
 github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
 github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
 github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
 github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
-github.com/onsi/ginkgo/v2 v2.26.0 h1:1J4Wut1IlYZNEAWIV3ALrT9NfiaGW2cDCJQSFQMs/gE=
-github.com/onsi/ginkgo/v2 v2.26.0/go.mod h1:qhEywmzWTBUY88kfO0BRvX4py7scov9yR+Az2oavUzw=
+github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
+github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
 github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
 github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -662,8 +656,6 @@ github.com/polydawn/refmt v0.89.0 h1:ADJTApkvkeBZsN0tBTx8QjpD9JkmxbKp0cxfr9qszm4
 github.com/polydawn/refmt v0.89.0/go.mod h1:/zvteZs/GwLtCgZ4BL6CBsk9IKIlexP43ObX9AxTqTw=
 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
-github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
-github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
 github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
 github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
 github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
@@ -711,8 +703,8 @@ github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8G
 github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
 github.com/shirou/gopsutil/v3 v3.24.5 h1:i0t8kL+kQTvpAYToeuiVk3TgDeKOFioZO3Ztz/iZ9pI=
 github.com/shirou/gopsutil/v3 v3.24.5/go.mod h1:bsoOS1aStSs9ErQ1WWfxllSeS1K5D+U30r2NfcubMVk=
-github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc=
-github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
+github.com/shirou/gopsutil/v4 v4.25.6 h1:kLysI2JsKorfaFPcYmcJqbzROzsBWEOAtw6A7dIfqXs=
+github.com/shirou/gopsutil/v4 v4.25.6/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
 github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
 github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
 github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
@@ -787,8 +779,8 @@ github.com/swaggo/files/v2 v2.0.2/go.mod h1:TVqetIzZsO9OhHX1Am9sRf9LdrFZqoK49N37
 github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
 github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
 github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
-github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
-github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
+github.com/testcontainers/testcontainers-go v0.40.0 h1:pSdJYLOVgLE8YdUY2FHQ1Fxu+aMnb6JfVz1mxk7OeMU=
+github.com/testcontainers/testcontainers-go v0.40.0/go.mod h1:FSXV5KQtX2HAMlm7U3APNyLkkap35zNLxukw9oBi/MY=
 github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
 github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
@@ -813,10 +805,8 @@ github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60Nt
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
-github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
-github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
-github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
+github.com/valyala/fasthttp v1.68.0 h1:v12Nx16iepr8r9ySOwqI+5RBJ/DqTxhOy1HrHoDFnok=
+github.com/valyala/fasthttp v1.68.0/go.mod h1:5EXiRfYQAoiO/khu4oU9VISC/eVY6JqmSpPJoHCKsz4=
 github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
 github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=
 github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU=
@@ -837,6 +827,8 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
 github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
 github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -876,8 +868,6 @@ go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8
 go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0=
 go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
-go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
-go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
 go.uber.org/dig v1.19.0 h1:BACLhebsYdpQ7IROQ1AGPjrXcP5dF80U3gKoFzbaq/4=
 go.uber.org/dig v1.19.0/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
 go.uber.org/fx v1.24.0 h1:wE8mruvpg2kiiL1Vqd0CC+tr0/24XIB10Iwp2lLWzkg=
@@ -913,8 +903,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
 golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
 golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
 golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
-golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
-golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
+golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
+golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 h1:bsqhLWFR6G6xiQcb+JoGqdKdRU6WzPWmK8E0jxTjzo4=
 golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
@@ -932,8 +922,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
-golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
+golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
+golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -957,12 +947,14 @@ golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
 golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
-golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
-golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
+golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
+golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
+golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
 golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -974,8 +966,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
-golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -1009,6 +1001,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
 golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 h1:dHQOQddU4YHS5gY33/6klKjq7Gp3WwMyOXGNp5nzRj8=
+golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053/go.mod h1:+nZKN+XVh4LCiA9DV3ywrzN4gumyCnKjau3NGb9SGoE=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -1016,8 +1010,8 @@ golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
 golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
 golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
 golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
-golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
-golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
+golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
+golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -1027,8 +1021,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
-golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
+golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
+golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
@@ -1050,8 +1044,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
-golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
+golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
+golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -79,6 +79,12 @@ func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
 			Type:       "object",
 			Properties: property,
 		})
+		/*
+			js.AnyOf = append(js.OneOf, Item{
+				Type:       "object",
+				Properties: property,
+			})
+		*/
 	}
 	return js
 }
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -13,99 +13,102 @@ import (
 	"github.com/rs/zerolog/log"
 )

+// @Description GrammarConfig contains configuration for grammar parsing
 type GrammarConfig struct {
 	// ParallelCalls enables the LLM to return multiple function calls in the same response
-	ParallelCalls bool `yaml:"parallel_calls"`
+	ParallelCalls bool `yaml:"parallel_calls,omitempty" json:"parallel_calls,omitempty"`

-	DisableParallelNewLines bool `yaml:"disable_parallel_new_lines"`
+	DisableParallelNewLines bool `yaml:"disable_parallel_new_lines,omitempty" json:"disable_parallel_new_lines,omitempty"`

 	// MixedMode enables the LLM to return strings and not only JSON objects
 	// This is useful for models to not constraining returning only JSON and also messages back to the user
-	MixedMode bool `yaml:"mixed_mode"`
+	MixedMode bool `yaml:"mixed_mode,omitempty" json:"mixed_mode,omitempty"`

 	// NoMixedFreeString disables the mixed mode for free strings
 	// In this way if the LLM selects a free string, it won't be mixed necessarily with JSON objects.
 	// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
 	// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
-	NoMixedFreeString bool `yaml:"no_mixed_free_string"`
+	NoMixedFreeString bool `yaml:"no_mixed_free_string,omitempty" json:"no_mixed_free_string,omitempty"`

 	// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
-	NoGrammar bool `yaml:"disable"`
+	NoGrammar bool `yaml:"disable,omitempty" json:"disable,omitempty"`

 	// Prefix is the suffix to append to the grammar when being generated
 	// This is useful when models prepend a tag before returning JSON
-	Prefix string `yaml:"prefix"`
+	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`

 	// ExpectStringsAfterJSON enables mixed string suffix
-	ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
+	ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json,omitempty" json:"expect_strings_after_json,omitempty"`

 	// PropOrder selects what order to print properties
 	// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
 	// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
-	PropOrder string `yaml:"properties_order"`
+	PropOrder string `yaml:"properties_order,omitempty" json:"properties_order,omitempty"`

 	// SchemaType can be configured to use a specific schema type to force the grammar
 	// available : json, llama3.1
-	SchemaType string `yaml:"schema_type"`
+	SchemaType string `yaml:"schema_type,omitempty" json:"schema_type,omitempty"`

-	GrammarTriggers []GrammarTrigger `yaml:"triggers"`
+	GrammarTriggers []GrammarTrigger `yaml:"triggers,omitempty" json:"triggers,omitempty"`
 }

+// @Description GrammarTrigger defines a trigger word for grammar parsing
 type GrammarTrigger struct {
 	// Trigger is the string that triggers the grammar
-	Word    string `yaml:"word"`
+	Word string `yaml:"word,omitempty" json:"word,omitempty"`
 }

-// FunctionsConfig is the configuration for the tool/function call.
+// @Description FunctionsConfig is the configuration for the tool/function call.
 // It includes setting to map the function name and arguments from the response
 // and, for instance, also if processing the requests with BNF grammars.
 type FunctionsConfig struct {
 	// DisableNoAction disables the "no action" tool
 	// By default we inject a tool that does nothing and is used to return an answer from the LLM
-	DisableNoAction bool `yaml:"disable_no_action"`
+	DisableNoAction bool `yaml:"disable_no_action,omitempty" json:"disable_no_action,omitempty"`

 	// Grammar is the configuration for the grammar
-	GrammarConfig GrammarConfig `yaml:"grammar"`
+	GrammarConfig GrammarConfig `yaml:"grammar,omitempty" json:"grammar,omitempty"`

 	// NoActionFunctionName is the name of the function that does nothing. It defaults to "answer"
-	NoActionFunctionName string `yaml:"no_action_function_name"`
+	NoActionFunctionName string `yaml:"no_action_function_name,omitempty" json:"no_action_function_name,omitempty"`

 	// NoActionDescriptionName is the name of the function that returns the description of the no action function
-	NoActionDescriptionName string `yaml:"no_action_description_name"`
+	NoActionDescriptionName string `yaml:"no_action_description_name,omitempty" json:"no_action_description_name,omitempty"`

 	// ResponseRegex is a named regex to extract the function name and arguments from the response
-	ResponseRegex []string `yaml:"response_regex"`
+	ResponseRegex []string `yaml:"response_regex,omitempty" json:"response_regex,omitempty"`

 	// JSONRegexMatch is a regex to extract the JSON object from the response
-	JSONRegexMatch []string `yaml:"json_regex_match"`
+	JSONRegexMatch []string `yaml:"json_regex_match,omitempty" json:"json_regex_match,omitempty"`

 	// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
-	ArgumentRegex []string `yaml:"argument_regex"`
+	ArgumentRegex []string `yaml:"argument_regex,omitempty" json:"argument_regex,omitempty"`
 	// ArgumentRegex named regex names for key and value extractions. default: key and value
-	ArgumentRegexKey   string `yaml:"argument_regex_key_name"`   // default: key
-	ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
+	ArgumentRegexKey   string `yaml:"argument_regex_key_name,omitempty" json:"argument_regex_key_name,omitempty"`   // default: key
+	ArgumentRegexValue string `yaml:"argument_regex_value_name,omitempty" json:"argument_regex_value_name,omitempty"` // default: value

 	// ReplaceFunctionResults allow to replace strings in the results before parsing them
-	ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
+	ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results,omitempty" json:"replace_function_results,omitempty"`

 	// ReplaceLLMResult allow to replace strings in the results before parsing them
-	ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"`
+	ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results,omitempty" json:"replace_llm_results,omitempty"`

 	// CaptureLLMResult is a regex to extract a string from the LLM response
 	// that is used as return string when using tools.
 	// This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back
-	CaptureLLMResult []string `yaml:"capture_llm_results"`
+	CaptureLLMResult []string `yaml:"capture_llm_results,omitempty" json:"capture_llm_results,omitempty"`

 	// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 	// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
 	// This might be useful for certain models trained with the function name as the first token.
-	FunctionNameKey      string `yaml:"function_name_key"`
-	FunctionArgumentsKey string `yaml:"function_arguments_key"`
+	FunctionNameKey      string `yaml:"function_name_key,omitempty" json:"function_name_key,omitempty"`
+	FunctionArgumentsKey string `yaml:"function_arguments_key,omitempty" json:"function_arguments_key,omitempty"`
 }

+// @Description ReplaceResult defines a key-value replacement for function results
 type ReplaceResult struct {
-	Key   string `yaml:"key"`
-	Value string `yaml:"value"`
+	Key   string `yaml:"key,omitempty" json:"key,omitempty"`
+	Value string `yaml:"value,omitempty" json:"value,omitempty"`
 }

 type FuncCallResults struct {
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -178,11 +178,22 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
 	}

 	for {
+		// Check if context is cancelled before receiving
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
 		reply, err := stream.Recv()
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
+			// Check if error is due to context cancellation
+			if ctx.Err() != nil {
+				return ctx.Err()
+			}
 			fmt.Println("Error", err)

 			return err
--- a/.github/gallery-agent/hfapi/client.go
+++ b/.github/gallery-agent/hfapi/client.go
@@ -51,6 +51,7 @@ type ModelFile struct {
 	Size     int64
 	SHA256   string
 	IsReadme bool
+	URL      string
 }

 // ModelDetails represents detailed information about a model
@@ -215,6 +216,7 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
 	}

 	// Process each file
+	baseURL := strings.TrimSuffix(c.baseURL, "/api/models")
 	for _, file := range files {
 		fileName := filepath.Base(file.Path)
 		isReadme := strings.Contains(strings.ToLower(fileName), "readme")
@@ -227,11 +229,16 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
 			sha256 = file.Oid
 		}

+		// Construct the full URL for the file
+		// Use /resolve/main/ for downloading files (handles LFS properly)
+		fileURL := fmt.Sprintf("%s/%s/resolve/main/%s", baseURL, repoID, file.Path)
+
 		modelFile := ModelFile{
 			Path:     file.Path,
 			Size:     file.Size,
 			SHA256:   sha256,
 			IsReadme: isReadme,
+			URL:      fileURL,
 		}

 		details.Files = append(details.Files, modelFile)
--- a/.github/gallery-agent/hfapi/client_test.go
+++ b/.github/gallery-agent/hfapi/client_test.go
@@ -1,6 +1,7 @@
 package hfapi_test

 import (
+	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"strings"
@@ -8,7 +9,7 @@ import (
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"

-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 )

 var _ = Describe("HuggingFace API Client", func() {
@@ -270,6 +271,15 @@ var _ = Describe("HuggingFace API Client", func() {
 		})
 	})

+	Context("when getting file SHA on remote model", func() {
+		It("should get file SHA successfully", func() {
+			sha, err := client.GetFileSHA(
+				"mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF", "localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf")
+			Expect(err).ToNot(HaveOccurred())
+			Expect(sha).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"))
+		})
+	})
+
 	Context("when listing files", func() {
 		BeforeEach(func() {
 			mockFilesResponse := `[
@@ -329,23 +339,25 @@ var _ = Describe("HuggingFace API Client", func() {

 	Context("when getting file SHA", func() {
 		BeforeEach(func() {
-			mockFileInfoResponse := `{
-				"path": "model-Q4_K_M.gguf",
-				"size": 1000000,
-				"oid": "abc123",
-				"lfs": {
-					"oid": "sha256:def456",
+			mockFilesResponse := `[
+				{
+					"type": "file",
+					"path": "model-Q4_K_M.gguf",
 					"size": 1000000,
-					"pointer": "version https://git-lfs.github.com/spec/v1",
-					"sha256": "def456789"
+					"oid": "abc123",
+					"lfs": {
+						"oid": "def456789",
+						"size": 1000000,
+						"pointerSize": 135
+					}
 				}
-			}`
+			]`

 			server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if strings.Contains(r.URL.Path, "/paths-info") {
+				if strings.Contains(r.URL.Path, "/tree/main") {
 					w.Header().Set("Content-Type", "application/json")
 					w.WriteHeader(http.StatusOK)
-					w.Write([]byte(mockFileInfoResponse))
+					w.Write([]byte(mockFilesResponse))
 				} else {
 					w.WriteHeader(http.StatusNotFound)
 				}
@@ -363,18 +375,29 @@ var _ = Describe("HuggingFace API Client", func() {

 		It("should handle missing SHA gracefully", func() {
 			server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				w.Header().Set("Content-Type", "application/json")
-				w.WriteHeader(http.StatusOK)
-				w.Write([]byte(`{"path": "file.txt", "size": 100}`))
+				if strings.Contains(r.URL.Path, "/tree/main") {
+					w.Header().Set("Content-Type", "application/json")
+					w.WriteHeader(http.StatusOK)
+					w.Write([]byte(`[
+						{
+							"type": "file",
+							"path": "file.txt",
+							"size": 100,
+							"oid": "file123"
+						}
+					]`))
+				} else {
+					w.WriteHeader(http.StatusNotFound)
+				}
 			}))

 			client.SetBaseURL(server.URL)

 			sha, err := client.GetFileSHA("test/model", "file.txt")

-			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring("no SHA256 found"))
-			Expect(sha).To(Equal(""))
+			Expect(err).ToNot(HaveOccurred())
+			// When there's no LFS, it should return the OID
+			Expect(sha).To(Equal("file123"))
 		})
 	})

@@ -439,6 +462,13 @@ var _ = Describe("HuggingFace API Client", func() {
 			Expect(details.ReadmeFile).ToNot(BeNil())
 			Expect(details.ReadmeFile.Path).To(Equal("README.md"))
 			Expect(details.ReadmeFile.IsReadme).To(BeTrue())
+
+			// Verify URLs are set for all files
+			baseURL := strings.TrimSuffix(server.URL, "/api/models")
+			for _, file := range details.Files {
+				expectedURL := fmt.Sprintf("%s/test/model/resolve/main/%s", baseURL, file.Path)
+				Expect(file.URL).To(Equal(expectedURL))
+			}
 		})
 	})

--- a/.github/gallery-agent/hfapi/hfapi_suite_test.go
+++ b/.github/gallery-agent/hfapi/hfapi_suite_test.go
@@ -11,3 +11,5 @@ func TestHfapi(t *testing.T) {
 	RegisterFailHandler(Fail)
 	RunSpecs(t, "HuggingFace API Suite")
 }
+
+