feat: import models via URI (#7245)

* feat: initial hook to install elements directly

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP: ui changes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move HF api client to pkg

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add simple importer for gguf files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add opcache

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wire importers to CLI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add omitempty to config fields

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add MLX importer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small refactors to star to use HF for discovery

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Common preferences

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add support to bare HF repos

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(importer/llama.cpp): add support for mmproj files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* add mmproj quants to common preferences

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix vlm usage in tokenizer mode with llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-11-12 20:48:56 +01:00
committed by GitHub
parent 87d0020c10
commit 3728552e94
40 changed files with 1970 additions and 694 deletions

View File

@@ -7,8 +7,8 @@ import (
"slices"
"strings"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
"github.com/mudler/cogito"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
cogito "github.com/mudler/cogito"
"github.com/mudler/cogito/structures"
"github.com/sashabaranov/go-openai/jsonschema"

View File

@@ -1,38 +0,0 @@
module github.com/go-skynet/LocalAI/.github/gallery-agent
go 1.24.1
require (
github.com/mudler/cogito v0.3.0
github.com/onsi/ginkgo/v2 v2.25.3
github.com/onsi/gomega v1.38.2
github.com/sashabaranov/go-openai v1.41.2
github.com/tmc/langchaingo v0.1.13
)
require (
dario.cat/mergo v1.0.1 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.4.0 // indirect
github.com/Masterminds/sprig/v3 v3.3.0 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/jsonschema-go v0.3.0 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/modelcontextprotocol/go-sdk v1.0.0 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/spf13/cast v1.7.0 // indirect
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
go.uber.org/automaxprocs v1.6.0 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/crypto v0.41.0 // indirect
golang.org/x/net v0.43.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/text v0.28.0 // indirect
golang.org/x/tools v0.36.0 // indirect
)

View File

@@ -1,168 +0,0 @@
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw=
github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw=
github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/jsonschema-go v0.3.0 h1:6AH2TxVNtk3IlvkkhjrtbUc4S8AvO0Xii0DxIygDg+Q=
github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/go-archive v0.1.0 h1:Kk/5rdW/g+H8NHdJW2gsXyZ7UnzvJNOy6VKJqueWdcQ=
github.com/moby/go-archive v0.1.0/go.mod h1:G9B+YoujNohJmrIYFBpSd54GTUB4lt9S+xVQvsJyFuo=
github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74=
github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/mudler/cogito v0.3.0 h1:NbVAO3bLkK5oGSY0xq87jlz8C9OIsLW55s+8Hfzeu9s=
github.com/mudler/cogito v0.3.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU=
github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc=
github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc=
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -9,7 +9,7 @@ import (
"strings"
"time"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
// ProcessedModelFile represents a processed model file with additional metadata

View File

@@ -3,9 +3,9 @@ package main
import (
"fmt"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
"github.com/sashabaranov/go-openai"
"github.com/tmc/langchaingo/jsonschema"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
openai "github.com/sashabaranov/go-openai"
jsonschema "github.com/sashabaranov/go-openai/jsonschema"
)
// Get repository README from HF
@@ -13,7 +13,7 @@ type HFReadmeTool struct {
client *hfapi.Client
}
func (s *HFReadmeTool) Run(args map[string]any) (string, error) {
func (s *HFReadmeTool) Execute(args map[string]any) (string, error) {
q, ok := args["repository"].(string)
if !ok {
return "", fmt.Errorf("no query")

View File

@@ -39,11 +39,6 @@ jobs:
with:
go-version: '1.21'
- name: Build gallery agent
run: |
cd .github/gallery-agent
go mod download
go build -o gallery-agent .
- name: Run gallery agent
env:
@@ -56,9 +51,7 @@ jobs:
MAX_MODELS: ${{ github.event.inputs.max_models || '1' }}
run: |
export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml
cd .github/gallery-agent
./gallery-agent
rm -rf gallery-agent
go run .github/gallery-agent
- name: Check for changes
id: check_changes

View File

@@ -590,17 +590,71 @@ public:
// Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse
json body_json;
json messages_json = json::array();
// Find the last user message index to attach images/audio to
int last_user_msg_idx = -1;
for (int i = request->messages_size() - 1; i >= 0; i--) {
if (request->messages(i).role() == "user") {
last_user_msg_idx = i;
break;
}
}
for (int i = 0; i < request->messages_size(); i++) {
const auto& msg = request->messages(i);
json msg_json;
msg_json["role"] = msg.role();
bool is_last_user_msg = (i == last_user_msg_idx);
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
// Handle content - can be string, null, or array
// For multimodal content, we'll embed images/audio from separate fields
if (!msg.content().empty()) {
msg_json["content"] = msg.content();
} else if (request->images_size() > 0 || request->audios_size() > 0) {
// If no content but has images/audio, create content array
// Try to parse content as JSON to see if it's already an array
json content_val;
try {
content_val = json::parse(msg.content());
} catch (const json::parse_error&) {
// Not JSON, treat as plain string
content_val = msg.content();
}
// If content is a string and this is the last user message with images/audio, combine them
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
json content_array = json::array();
// Add text first
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
// Add images
if (request->images_size() > 0) {
for (int j = 0; j < request->images_size(); j++) {
json image_chunk;
image_chunk["type"] = "image_url";
json image_url;
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
image_chunk["image_url"] = image_url;
content_array.push_back(image_chunk);
}
}
// Add audios
if (request->audios_size() > 0) {
for (int j = 0; j < request->audios_size(); j++) {
json audio_chunk;
audio_chunk["type"] = "input_audio";
json input_audio;
input_audio["data"] = request->audios(j);
input_audio["format"] = "wav"; // default, could be made configurable
audio_chunk["input_audio"] = input_audio;
content_array.push_back(audio_chunk);
}
}
msg_json["content"] = content_array;
} else {
// Use content as-is (already array or not last user message)
msg_json["content"] = content_val;
}
} else if (is_last_user_msg && has_images_or_audio) {
// If no content but this is the last user message with images/audio, create content array
json content_array = json::array();
if (request->images_size() > 0) {
for (int j = 0; j < request->images_size(); j++) {
@@ -718,6 +772,9 @@ public:
// Create parser options with current chat_templates to ensure tmpls is not null
oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt;
parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates
// Update allow_image and allow_audio based on current mctx state
parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
// Extract the prompt from parsed data
@@ -758,7 +815,7 @@ public:
// If not using chat templates, extract files from image_data/audio_data fields
// (If using chat templates, files were already extracted by oaicompat_chat_params_parse)
//if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
const auto &images_data = data.find("image_data");
if (images_data != data.end() && images_data->is_array())
{
@@ -778,7 +835,7 @@ public:
files.push_back(decoded_data);
}
}
// }
}
const bool has_mtmd = ctx_server.mctx != nullptr;
@@ -917,17 +974,71 @@ public:
// Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse
json body_json;
json messages_json = json::array();
// Find the last user message index to attach images/audio to
int last_user_msg_idx = -1;
for (int i = request->messages_size() - 1; i >= 0; i--) {
if (request->messages(i).role() == "user") {
last_user_msg_idx = i;
break;
}
}
for (int i = 0; i < request->messages_size(); i++) {
const auto& msg = request->messages(i);
json msg_json;
msg_json["role"] = msg.role();
bool is_last_user_msg = (i == last_user_msg_idx);
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
// Handle content - can be string, null, or array
// For multimodal content, we'll embed images/audio from separate fields
if (!msg.content().empty()) {
msg_json["content"] = msg.content();
} else if (request->images_size() > 0 || request->audios_size() > 0) {
// If no content but has images/audio, create content array
// Try to parse content as JSON to see if it's already an array
json content_val;
try {
content_val = json::parse(msg.content());
} catch (const json::parse_error&) {
// Not JSON, treat as plain string
content_val = msg.content();
}
// If content is a string and this is the last user message with images/audio, combine them
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
json content_array = json::array();
// Add text first
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
// Add images
if (request->images_size() > 0) {
for (int j = 0; j < request->images_size(); j++) {
json image_chunk;
image_chunk["type"] = "image_url";
json image_url;
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
image_chunk["image_url"] = image_url;
content_array.push_back(image_chunk);
}
}
// Add audios
if (request->audios_size() > 0) {
for (int j = 0; j < request->audios_size(); j++) {
json audio_chunk;
audio_chunk["type"] = "input_audio";
json input_audio;
input_audio["data"] = request->audios(j);
input_audio["format"] = "wav"; // default, could be made configurable
audio_chunk["input_audio"] = input_audio;
content_array.push_back(audio_chunk);
}
}
msg_json["content"] = content_array;
} else {
// Use content as-is (already array or not last user message)
msg_json["content"] = content_val;
}
} else if (is_last_user_msg && has_images_or_audio) {
// If no content but this is the last user message with images/audio, create content array
json content_array = json::array();
if (request->images_size() > 0) {
for (int j = 0; j < request->images_size(); j++) {
@@ -1048,6 +1159,9 @@ public:
// Create parser options with current chat_templates to ensure tmpls is not null
oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt;
parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates
// Update allow_image and allow_audio based on current mctx state
parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
// Extract the prompt from parsed data
@@ -1088,7 +1202,7 @@ public:
// If not using chat templates, extract files from image_data/audio_data fields
// (If using chat templates, files were already extracted by oaicompat_chat_params_parse)
// if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
const auto &images_data = data.find("image_data");
if (images_data != data.end() && images_data->is_array())
{
@@ -1110,7 +1224,7 @@ public:
files.push_back(decoded_data);
}
}
// }
}
// process files
const bool has_mtmd = ctx_server.mctx != nullptr;

View File

@@ -22,9 +22,15 @@ func New(opts ...config.AppOption) (*Application, error) {
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
if err := application.start(); err != nil {
return nil, err
}
caps, err := xsysinfo.CPUCapabilities()
if err == nil {
log.Debug().Msgf("CPU capabilities: %v", caps)
}
gpus, err := xsysinfo.GPUs()
if err == nil {
@@ -56,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
if err := coreStartup.InstallModels(application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
log.Error().Err(err).Msg("error installing models")
}
@@ -152,10 +158,6 @@ func New(opts ...config.AppOption) (*Application, error) {
// Watch the configuration directory
startWatcher(options)
if err := application.start(); err != nil {
return nil, err
}
log.Info().Msg("core/startup process completed!")
return application, nil
}

View File

@@ -1,12 +1,14 @@
package cli
import (
"context"
"encoding/json"
"errors"
"fmt"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/startup"
@@ -78,6 +80,12 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
return err
}
galleryService := services.NewGalleryService(&config.ApplicationConfig{}, model.NewModelLoader(systemState, true))
err = galleryService.Start(context.Background(), config.NewModelConfigLoader(mi.ModelsPath), systemState)
if err != nil {
return err
}
var galleries []config.Gallery
if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
log.Error().Err(err).Msg("unable to load galleries")
@@ -127,7 +135,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
}
modelLoader := model.NewModelLoader(systemState, true)
err = startup.InstallModels(galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
err = startup.InstallModels(galleryService, galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
if err != nil {
return err
}

View File

@@ -16,30 +16,31 @@ const (
RAND_SEED = -1
)
// @Description TTS configuration
type TTSConfig struct {
// Voice wav path or id
Voice string `yaml:"voice" json:"voice"`
Voice string `yaml:"voice,omitempty" json:"voice,omitempty"`
AudioPath string `yaml:"audio_path" json:"audio_path"`
AudioPath string `yaml:"audio_path,omitempty" json:"audio_path,omitempty"`
}
// ModelConfig represents a model configuration
// @Description ModelConfig represents a model configuration
type ModelConfig struct {
modelConfigFile string `yaml:"-" json:"-"`
schema.PredictionOptions `yaml:"parameters" json:"parameters"`
Name string `yaml:"name" json:"name"`
schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"`
Name string `yaml:"name,omitempty" json:"name,omitempty"`
F16 *bool `yaml:"f16" json:"f16"`
Threads *int `yaml:"threads" json:"threads"`
Debug *bool `yaml:"debug" json:"debug"`
Roles map[string]string `yaml:"roles" json:"roles"`
Embeddings *bool `yaml:"embeddings" json:"embeddings"`
Backend string `yaml:"backend" json:"backend"`
TemplateConfig TemplateConfig `yaml:"template" json:"template"`
KnownUsecaseStrings []string `yaml:"known_usecases" json:"known_usecases"`
F16 *bool `yaml:"f16,omitempty" json:"f16,omitempty"`
Threads *int `yaml:"threads,omitempty" json:"threads,omitempty"`
Debug *bool `yaml:"debug,omitempty" json:"debug,omitempty"`
Roles map[string]string `yaml:"roles,omitempty" json:"roles,omitempty"`
Embeddings *bool `yaml:"embeddings,omitempty" json:"embeddings,omitempty"`
Backend string `yaml:"backend,omitempty" json:"backend,omitempty"`
TemplateConfig TemplateConfig `yaml:"template,omitempty" json:"template,omitempty"`
KnownUsecaseStrings []string `yaml:"known_usecases,omitempty" json:"known_usecases,omitempty"`
KnownUsecases *ModelConfigUsecases `yaml:"-" json:"-"`
Pipeline Pipeline `yaml:"pipeline" json:"pipeline"`
Pipeline Pipeline `yaml:"pipeline,omitempty" json:"pipeline,omitempty"`
PromptStrings, InputStrings []string `yaml:"-" json:"-"`
InputToken [][]int `yaml:"-" json:"-"`
@@ -47,50 +48,52 @@ type ModelConfig struct {
ResponseFormat string `yaml:"-" json:"-"`
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
FunctionsConfig functions.FunctionsConfig `yaml:"function" json:"function"`
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
FeatureFlag FeatureFlag `yaml:"feature_flags" json:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig `yaml:",inline" json:",inline"`
// Diffusers
Diffusers Diffusers `yaml:"diffusers" json:"diffusers"`
Step int `yaml:"step" json:"step"`
Diffusers Diffusers `yaml:"diffusers,omitempty" json:"diffusers,omitempty"`
Step int `yaml:"step,omitempty" json:"step,omitempty"`
// GRPC Options
GRPC GRPC `yaml:"grpc" json:"grpc"`
GRPC GRPC `yaml:"grpc,omitempty" json:"grpc,omitempty"`
// TTS specifics
TTSConfig `yaml:"tts" json:"tts"`
TTSConfig `yaml:"tts,omitempty" json:"tts,omitempty"`
// CUDA
// Explicitly enable CUDA or not (some backends might need it)
CUDA bool `yaml:"cuda" json:"cuda"`
CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`
DownloadFiles []File `yaml:"download_files" json:"download_files"`
DownloadFiles []File `yaml:"download_files,omitempty" json:"download_files,omitempty"`
Description string `yaml:"description" json:"description"`
Usage string `yaml:"usage" json:"usage"`
Description string `yaml:"description,omitempty" json:"description,omitempty"`
Usage string `yaml:"usage,omitempty" json:"usage,omitempty"`
Options []string `yaml:"options" json:"options"`
Overrides []string `yaml:"overrides" json:"overrides"`
Options []string `yaml:"options,omitempty" json:"options,omitempty"`
Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`
MCP MCPConfig `yaml:"mcp" json:"mcp"`
Agent AgentConfig `yaml:"agent" json:"agent"`
MCP MCPConfig `yaml:"mcp,omitempty" json:"mcp,omitempty"`
Agent AgentConfig `yaml:"agent,omitempty" json:"agent,omitempty"`
}
// @Description MCP configuration
type MCPConfig struct {
Servers string `yaml:"remote" json:"remote"`
Stdio string `yaml:"stdio" json:"stdio"`
Servers string `yaml:"remote,omitempty" json:"remote,omitempty"`
Stdio string `yaml:"stdio,omitempty" json:"stdio,omitempty"`
}
// @Description Agent configuration
type AgentConfig struct {
MaxAttempts int `yaml:"max_attempts" json:"max_attempts"`
MaxIterations int `yaml:"max_iterations" json:"max_iterations"`
EnableReasoning bool `yaml:"enable_reasoning" json:"enable_reasoning"`
EnablePlanning bool `yaml:"enable_planning" json:"enable_planning"`
EnableMCPPrompts bool `yaml:"enable_mcp_prompts" json:"enable_mcp_prompts"`
EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator" json:"enable_plan_re_evaluator"`
MaxAttempts int `yaml:"max_attempts,omitempty" json:"max_attempts,omitempty"`
MaxIterations int `yaml:"max_iterations,omitempty" json:"max_iterations,omitempty"`
EnableReasoning bool `yaml:"enable_reasoning,omitempty" json:"enable_reasoning,omitempty"`
EnablePlanning bool `yaml:"enable_planning,omitempty" json:"enable_planning,omitempty"`
EnableMCPPrompts bool `yaml:"enable_mcp_prompts,omitempty" json:"enable_mcp_prompts,omitempty"`
EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator,omitempty" json:"enable_plan_re_evaluator,omitempty"`
}
func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers], error) {
@@ -107,35 +110,39 @@ func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCP
return remote, stdio, nil
}
// @Description MCP generic configuration
type MCPGenericConfig[T any] struct {
Servers T `yaml:"mcpServers" json:"mcpServers"`
Servers T `yaml:"mcpServers,omitempty" json:"mcpServers,omitempty"`
}
type MCPRemoteServers map[string]MCPRemoteServer
type MCPSTDIOServers map[string]MCPSTDIOServer
// @Description MCP remote server configuration
type MCPRemoteServer struct {
URL string `json:"url"`
Token string `json:"token"`
URL string `json:"url,omitempty"`
Token string `json:"token,omitempty"`
}
// @Description MCP STDIO server configuration
type MCPSTDIOServer struct {
Args []string `json:"args"`
Env map[string]string `json:"env"`
Command string `json:"command"`
Args []string `json:"args,omitempty"`
Env map[string]string `json:"env,omitempty"`
Command string `json:"command,omitempty"`
}
// Pipeline defines other models to use for audio-to-audio
// @Description Pipeline defines other models to use for audio-to-audio
type Pipeline struct {
TTS string `yaml:"tts" json:"tts"`
LLM string `yaml:"llm" json:"llm"`
Transcription string `yaml:"transcription" json:"transcription"`
VAD string `yaml:"vad" json:"vad"`
TTS string `yaml:"tts,omitempty" json:"tts,omitempty"`
LLM string `yaml:"llm,omitempty" json:"llm,omitempty"`
Transcription string `yaml:"transcription,omitempty" json:"transcription,omitempty"`
VAD string `yaml:"vad,omitempty" json:"vad,omitempty"`
}
// @Description File configuration for model downloads
type File struct {
Filename string `yaml:"filename" json:"filename"`
SHA256 string `yaml:"sha256" json:"sha256"`
URI downloader.URI `yaml:"uri" json:"uri"`
Filename string `yaml:"filename,omitempty" json:"filename,omitempty"`
SHA256 string `yaml:"sha256,omitempty" json:"sha256,omitempty"`
URI downloader.URI `yaml:"uri,omitempty" json:"uri,omitempty"`
}
type FeatureFlag map[string]*bool
@@ -147,124 +154,125 @@ func (ff FeatureFlag) Enabled(s string) bool {
return false
}
// @Description GRPC configuration
type GRPC struct {
Attempts int `yaml:"attempts" json:"attempts"`
AttemptsSleepTime int `yaml:"attempts_sleep_time" json:"attempts_sleep_time"`
Attempts int `yaml:"attempts,omitempty" json:"attempts,omitempty"`
AttemptsSleepTime int `yaml:"attempts_sleep_time,omitempty" json:"attempts_sleep_time,omitempty"`
}
// @Description Diffusers configuration
type Diffusers struct {
CUDA bool `yaml:"cuda" json:"cuda"`
PipelineType string `yaml:"pipeline_type" json:"pipeline_type"`
SchedulerType string `yaml:"scheduler_type" json:"scheduler_type"`
EnableParameters string `yaml:"enable_parameters" json:"enable_parameters"` // A list of comma separated parameters to specify
IMG2IMG bool `yaml:"img2img" json:"img2img"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip" json:"clip_skip"` // Skip every N frames
ClipModel string `yaml:"clip_model" json:"clip_model"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder" json:"clip_subfolder"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net" json:"control_net"`
CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`
PipelineType string `yaml:"pipeline_type,omitempty" json:"pipeline_type,omitempty"`
SchedulerType string `yaml:"scheduler_type,omitempty" json:"scheduler_type,omitempty"`
EnableParameters string `yaml:"enable_parameters,omitempty" json:"enable_parameters,omitempty"` // A list of comma separated parameters to specify
IMG2IMG bool `yaml:"img2img,omitempty" json:"img2img,omitempty"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip,omitempty" json:"clip_skip,omitempty"` // Skip every N frames
ClipModel string `yaml:"clip_model,omitempty" json:"clip_model,omitempty"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder,omitempty" json:"clip_subfolder,omitempty"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net,omitempty" json:"control_net,omitempty"`
}
// LLMConfig is a struct that holds the configuration that are
// generic for most of the LLM backends.
// @Description LLMConfig is a struct that holds the configuration that are generic for most of the LLM backends.
type LLMConfig struct {
SystemPrompt string `yaml:"system_prompt" json:"system_prompt"`
TensorSplit string `yaml:"tensor_split" json:"tensor_split"`
MainGPU string `yaml:"main_gpu" json:"main_gpu"`
RMSNormEps float32 `yaml:"rms_norm_eps" json:"rms_norm_eps"`
NGQA int32 `yaml:"ngqa" json:"ngqa"`
PromptCachePath string `yaml:"prompt_cache_path" json:"prompt_cache_path"`
PromptCacheAll bool `yaml:"prompt_cache_all" json:"prompt_cache_all"`
PromptCacheRO bool `yaml:"prompt_cache_ro" json:"prompt_cache_ro"`
MirostatETA *float64 `yaml:"mirostat_eta" json:"mirostat_eta"`
MirostatTAU *float64 `yaml:"mirostat_tau" json:"mirostat_tau"`
Mirostat *int `yaml:"mirostat" json:"mirostat"`
NGPULayers *int `yaml:"gpu_layers" json:"gpu_layers"`
MMap *bool `yaml:"mmap" json:"mmap"`
MMlock *bool `yaml:"mmlock" json:"mmlock"`
LowVRAM *bool `yaml:"low_vram" json:"low_vram"`
Reranking *bool `yaml:"reranking" json:"reranking"`
Grammar string `yaml:"grammar" json:"grammar"`
StopWords []string `yaml:"stopwords" json:"stopwords"`
Cutstrings []string `yaml:"cutstrings" json:"cutstrings"`
ExtractRegex []string `yaml:"extract_regex" json:"extract_regex"`
TrimSpace []string `yaml:"trimspace" json:"trimspace"`
TrimSuffix []string `yaml:"trimsuffix" json:"trimsuffix"`
SystemPrompt string `yaml:"system_prompt,omitempty" json:"system_prompt,omitempty"`
TensorSplit string `yaml:"tensor_split,omitempty" json:"tensor_split,omitempty"`
MainGPU string `yaml:"main_gpu,omitempty" json:"main_gpu,omitempty"`
RMSNormEps float32 `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"`
NGQA int32 `yaml:"ngqa,omitempty" json:"ngqa,omitempty"`
PromptCachePath string `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"`
PromptCacheAll bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
PromptCacheRO bool `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"`
MirostatETA *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"`
MirostatTAU *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"`
Mirostat *int `yaml:"mirostat,omitempty" json:"mirostat,omitempty"`
NGPULayers *int `yaml:"gpu_layers,omitempty" json:"gpu_layers,omitempty"`
MMap *bool `yaml:"mmap,omitempty" json:"mmap,omitempty"`
MMlock *bool `yaml:"mmlock,omitempty" json:"mmlock,omitempty"`
LowVRAM *bool `yaml:"low_vram,omitempty" json:"low_vram,omitempty"`
Reranking *bool `yaml:"reranking,omitempty" json:"reranking,omitempty"`
Grammar string `yaml:"grammar,omitempty" json:"grammar,omitempty"`
StopWords []string `yaml:"stopwords,omitempty" json:"stopwords,omitempty"`
Cutstrings []string `yaml:"cutstrings,omitempty" json:"cutstrings,omitempty"`
ExtractRegex []string `yaml:"extract_regex,omitempty" json:"extract_regex,omitempty"`
TrimSpace []string `yaml:"trimspace,omitempty" json:"trimspace,omitempty"`
TrimSuffix []string `yaml:"trimsuffix,omitempty" json:"trimsuffix,omitempty"`
ContextSize *int `yaml:"context_size" json:"context_size"`
NUMA bool `yaml:"numa" json:"numa"`
LoraAdapter string `yaml:"lora_adapter" json:"lora_adapter"`
LoraBase string `yaml:"lora_base" json:"lora_base"`
LoraAdapters []string `yaml:"lora_adapters" json:"lora_adapters"`
LoraScales []float32 `yaml:"lora_scales" json:"lora_scales"`
LoraScale float32 `yaml:"lora_scale" json:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq" json:"no_mulmatq"`
DraftModel string `yaml:"draft_model" json:"draft_model"`
NDraft int32 `yaml:"n_draft" json:"n_draft"`
Quantization string `yaml:"quantization" json:"quantization"`
LoadFormat string `yaml:"load_format" json:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization" json:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code" json:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager" json:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space" json:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len" json:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size" json:"tensor_parallel_size"` // vLLM
DisableLogStatus bool `yaml:"disable_log_stats" json:"disable_log_stats"` // vLLM
DType string `yaml:"dtype" json:"dtype"` // vLLM
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt" json:"limit_mm_per_prompt"` // vLLM
MMProj string `yaml:"mmproj" json:"mmproj"`
ContextSize *int `yaml:"context_size,omitempty" json:"context_size,omitempty"`
NUMA bool `yaml:"numa,omitempty" json:"numa,omitempty"`
LoraAdapter string `yaml:"lora_adapter,omitempty" json:"lora_adapter,omitempty"`
LoraBase string `yaml:"lora_base,omitempty" json:"lora_base,omitempty"`
LoraAdapters []string `yaml:"lora_adapters,omitempty" json:"lora_adapters,omitempty"`
LoraScales []float32 `yaml:"lora_scales,omitempty" json:"lora_scales,omitempty"`
LoraScale float32 `yaml:"lora_scale,omitempty" json:"lora_scale,omitempty"`
NoMulMatQ bool `yaml:"no_mulmatq,omitempty" json:"no_mulmatq,omitempty"`
DraftModel string `yaml:"draft_model,omitempty" json:"draft_model,omitempty"`
NDraft int32 `yaml:"n_draft,omitempty" json:"n_draft,omitempty"`
Quantization string `yaml:"quantization,omitempty" json:"quantization,omitempty"`
LoadFormat string `yaml:"load_format,omitempty" json:"load_format,omitempty"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization,omitempty" json:"gpu_memory_utilization,omitempty"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code,omitempty" json:"trust_remote_code,omitempty"` // vLLM
EnforceEager bool `yaml:"enforce_eager,omitempty" json:"enforce_eager,omitempty"` // vLLM
SwapSpace int `yaml:"swap_space,omitempty" json:"swap_space,omitempty"` // vLLM
MaxModelLen int `yaml:"max_model_len,omitempty" json:"max_model_len,omitempty"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size,omitempty" json:"tensor_parallel_size,omitempty"` // vLLM
DisableLogStatus bool `yaml:"disable_log_stats,omitempty" json:"disable_log_stats,omitempty"` // vLLM
DType string `yaml:"dtype,omitempty" json:"dtype,omitempty"` // vLLM
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt,omitempty" json:"limit_mm_per_prompt,omitempty"` // vLLM
MMProj string `yaml:"mmproj,omitempty" json:"mmproj,omitempty"`
FlashAttention *string `yaml:"flash_attention" json:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading" json:"no_kv_offloading"`
CacheTypeK string `yaml:"cache_type_k" json:"cache_type_k"`
CacheTypeV string `yaml:"cache_type_v" json:"cache_type_v"`
FlashAttention *string `yaml:"flash_attention,omitempty" json:"flash_attention,omitempty"`
NoKVOffloading bool `yaml:"no_kv_offloading,omitempty" json:"no_kv_offloading,omitempty"`
CacheTypeK string `yaml:"cache_type_k,omitempty" json:"cache_type_k,omitempty"`
CacheTypeV string `yaml:"cache_type_v,omitempty" json:"cache_type_v,omitempty"`
RopeScaling string `yaml:"rope_scaling" json:"rope_scaling"`
ModelType string `yaml:"type" json:"type"`
RopeScaling string `yaml:"rope_scaling,omitempty" json:"rope_scaling,omitempty"`
ModelType string `yaml:"type,omitempty" json:"type,omitempty"`
YarnExtFactor float32 `yaml:"yarn_ext_factor" json:"yarn_ext_factor"`
YarnAttnFactor float32 `yaml:"yarn_attn_factor" json:"yarn_attn_factor"`
YarnBetaFast float32 `yaml:"yarn_beta_fast" json:"yarn_beta_fast"`
YarnBetaSlow float32 `yaml:"yarn_beta_slow" json:"yarn_beta_slow"`
YarnExtFactor float32 `yaml:"yarn_ext_factor,omitempty" json:"yarn_ext_factor,omitempty"`
YarnAttnFactor float32 `yaml:"yarn_attn_factor,omitempty" json:"yarn_attn_factor,omitempty"`
YarnBetaFast float32 `yaml:"yarn_beta_fast,omitempty" json:"yarn_beta_fast,omitempty"`
YarnBetaSlow float32 `yaml:"yarn_beta_slow,omitempty" json:"yarn_beta_slow,omitempty"`
CFGScale float32 `yaml:"cfg_scale" json:"cfg_scale"` // Classifier-Free Guidance Scale
CFGScale float32 `yaml:"cfg_scale,omitempty" json:"cfg_scale,omitempty"` // Classifier-Free Guidance Scale
}
// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
// @Description LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
type LimitMMPerPrompt struct {
LimitImagePerPrompt int `yaml:"image" json:"image"`
LimitVideoPerPrompt int `yaml:"video" json:"video"`
LimitAudioPerPrompt int `yaml:"audio" json:"audio"`
LimitImagePerPrompt int `yaml:"image,omitempty" json:"image,omitempty"`
LimitVideoPerPrompt int `yaml:"video,omitempty" json:"video,omitempty"`
LimitAudioPerPrompt int `yaml:"audio,omitempty" json:"audio,omitempty"`
}
// TemplateConfig is a struct that holds the configuration of the templating system
// @Description TemplateConfig is a struct that holds the configuration of the templating system
type TemplateConfig struct {
// Chat is the template used in the chat completion endpoint
Chat string `yaml:"chat" json:"chat"`
Chat string `yaml:"chat,omitempty" json:"chat,omitempty"`
// ChatMessage is the template used for chat messages
ChatMessage string `yaml:"chat_message" json:"chat_message"`
ChatMessage string `yaml:"chat_message,omitempty" json:"chat_message,omitempty"`
// Completion is the template used for completion requests
Completion string `yaml:"completion" json:"completion"`
Completion string `yaml:"completion,omitempty" json:"completion,omitempty"`
// Edit is the template used for edit completion requests
Edit string `yaml:"edit" json:"edit"`
Edit string `yaml:"edit,omitempty" json:"edit,omitempty"`
// Functions is the template used when tools are present in the client requests
Functions string `yaml:"function" json:"function"`
Functions string `yaml:"function,omitempty" json:"function,omitempty"`
// UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used.
// Note: this is mostly consumed for backends such as vllm and transformers
// that can use the tokenizers specified in the JSON config files of the models
UseTokenizerTemplate bool `yaml:"use_tokenizer_template" json:"use_tokenizer_template"`
UseTokenizerTemplate bool `yaml:"use_tokenizer_template,omitempty" json:"use_tokenizer_template,omitempty"`
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
// It defaults to \n
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character" json:"join_chat_messages_by_character"`
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character,omitempty" json:"join_chat_messages_by_character,omitempty"`
Multimodal string `yaml:"multimodal" json:"multimodal"`
Multimodal string `yaml:"multimodal,omitempty" json:"multimodal,omitempty"`
ReplyPrefix string `yaml:"reply_prefix" json:"reply_prefix"`
ReplyPrefix string `yaml:"reply_prefix,omitempty" json:"reply_prefix,omitempty"`
}
func (c *ModelConfig) syncKnownUsecasesFromString() {

View File

@@ -0,0 +1,65 @@
package importers
import (
"encoding/json"
"strings"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/gallery"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
var DefaultImporters = []Importer{
&LlamaCPPImporter{},
&MLXImporter{},
}
type Details struct {
HuggingFace *hfapi.ModelDetails
URI string
Preferences json.RawMessage
}
type Importer interface {
Match(details Details) bool
Import(details Details) (gallery.ModelConfig, error)
}
func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) {
var err error
var modelConfig gallery.ModelConfig
hf := hfapi.NewClient()
hfrepoID := strings.ReplaceAll(uri, "huggingface://", "")
hfrepoID = strings.ReplaceAll(hfrepoID, "hf://", "")
hfrepoID = strings.ReplaceAll(hfrepoID, "https://huggingface.co/", "")
hfDetails, err := hf.GetModelDetails(hfrepoID)
if err != nil {
// maybe not a HF repository
// TODO: maybe we can check if the URI is a valid HF repository
log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
} else {
log.Debug().Str("uri", uri).Msg("Got model details")
log.Debug().Any("details", hfDetails).Msg("Model details")
}
details := Details{
HuggingFace: hfDetails,
URI: uri,
Preferences: preferences,
}
for _, importer := range DefaultImporters {
if importer.Match(details) {
modelConfig, err = importer.Import(details)
if err != nil {
continue
}
break
}
}
return modelConfig, err
}

View File

@@ -0,0 +1,13 @@
package importers_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestImporters(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Importers test suite")
}

View File

@@ -0,0 +1,173 @@
package importers_test
import (
"encoding/json"
"fmt"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("DiscoverModelConfig", func() {
Context("With only a repository URI", func() {
It("should discover and import using LlamaCPPImporter", func() {
uri := "https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
Expect(modelConfig.Name).To(Equal("LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig))
})
})
Context("with .gguf URI", func() {
It("should discover and import using LlamaCPPImporter", func() {
uri := "https://example.com/my-model.gguf"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("my-model.gguf"))
Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
})
It("should use custom preferences when provided", func() {
uri := "https://example.com/my-model.gguf"
preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-name"))
Expect(modelConfig.Description).To(Equal("Custom description"))
})
})
Context("with mlx-community URI", func() {
It("should discover and import using MLXImporter", func() {
uri := "https://huggingface.co/mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
})
It("should use custom preferences when provided", func() {
uri := "https://huggingface.co/mlx-community/test-model"
preferences := json.RawMessage(`{"name": "custom-mlx", "description": "Custom MLX description"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-mlx"))
Expect(modelConfig.Description).To(Equal("Custom MLX description"))
})
})
Context("with backend preference", func() {
It("should use llama-cpp backend when specified", func() {
uri := "https://example.com/model"
preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
})
It("should use mlx backend when specified", func() {
uri := "https://example.com/model"
preferences := json.RawMessage(`{"backend": "mlx"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
})
It("should use mlx-vlm backend when specified", func() {
uri := "https://example.com/model"
preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
})
})
Context("with HuggingFace URI formats", func() {
It("should handle huggingface:// prefix", func() {
uri := "huggingface://mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
})
It("should handle hf:// prefix", func() {
uri := "hf://mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
})
It("should handle https://huggingface.co/ prefix", func() {
uri := "https://huggingface.co/mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
})
})
Context("with invalid or non-matching URI", func() {
It("should return error when no importer matches", func() {
uri := "https://example.com/unknown-model.bin"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
// When no importer matches, the function returns empty config and error
// The exact behavior depends on implementation, but typically an error is returned
Expect(modelConfig.Name).To(BeEmpty())
Expect(err).To(HaveOccurred())
})
})
Context("with invalid JSON preferences", func() {
It("should return error when JSON is invalid even if URI matches", func() {
uri := "https://example.com/model.gguf"
preferences := json.RawMessage(`invalid json`)
// Even though Match() returns true for .gguf extension,
// Import() will fail when trying to unmarshal invalid JSON preferences
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).To(HaveOccurred())
Expect(modelConfig.Name).To(BeEmpty())
})
})
})

View File

@@ -0,0 +1,185 @@
package importers
import (
"encoding/json"
"path/filepath"
"slices"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"go.yaml.in/yaml/v2"
)
var _ Importer = &LlamaCPPImporter{}
type LlamaCPPImporter struct{}
func (i *LlamaCPPImporter) Match(details Details) bool {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return false
}
if preferencesMap["backend"] == "llama-cpp" {
return true
}
if strings.HasSuffix(details.URI, ".gguf") {
return true
}
if details.HuggingFace != nil {
for _, file := range details.HuggingFace.Files {
if strings.HasSuffix(file.Path, ".gguf") {
return true
}
}
}
return false
}
func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return gallery.ModelConfig{}, err
}
name, ok := preferencesMap["name"].(string)
if !ok {
name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
description = "Imported from " + details.URI
}
preferedQuantizations, _ := preferencesMap["quantizations"].(string)
quants := []string{"q4_k_m", "q4_0", "q8_0", "f16"}
if preferedQuantizations != "" {
quants = strings.Split(preferedQuantizations, ",")
}
mmprojQuants, _ := preferencesMap["mmproj_quantizations"].(string)
mmprojQuantsList := []string{"fp16"}
if mmprojQuants != "" {
mmprojQuantsList = strings.Split(mmprojQuants, ",")
}
modelConfig := config.ModelConfig{
Name: name,
Description: description,
KnownUsecaseStrings: []string{"chat"},
Backend: "llama-cpp",
TemplateConfig: config.TemplateConfig{
UseTokenizerTemplate: true,
},
FunctionsConfig: functions.FunctionsConfig{
GrammarConfig: functions.GrammarConfig{
NoGrammar: true,
},
},
}
cfg := gallery.ModelConfig{
Name: name,
Description: description,
}
if strings.Contains(details.URI, ".gguf") {
cfg.Files = append(cfg.Files, gallery.File{
URI: details.URI,
Filename: filepath.Base(details.URI),
})
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: filepath.Base(details.URI),
},
}
} else if details.HuggingFace != nil {
lastMMProjFile := gallery.File{}
foundPreferedQuant := false
for _, file := range details.HuggingFace.Files {
// get the files of the prefered quants
if slices.ContainsFunc(quants, func(quant string) bool {
return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
}) {
cfg.Files = append(cfg.Files, gallery.File{
URI: file.URL,
Filename: filepath.Base(file.Path),
SHA256: file.SHA256,
})
}
// Get the mmproj prefered quants
if strings.Contains(strings.ToLower(file.Path), "mmproj") {
lastMMProjFile = gallery.File{
URI: file.URL,
Filename: filepath.Base(file.Path),
SHA256: file.SHA256,
}
if slices.ContainsFunc(mmprojQuantsList, func(quant string) bool {
return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
}) {
foundPreferedQuant = true
cfg.Files = append(cfg.Files, lastMMProjFile)
}
}
}
if !foundPreferedQuant && lastMMProjFile.URI != "" {
cfg.Files = append(cfg.Files, lastMMProjFile)
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: lastMMProjFile.Filename,
},
}
}
// Find first mmproj file
for _, file := range cfg.Files {
if !strings.Contains(strings.ToLower(file.Filename), "mmproj") {
continue
}
modelConfig.MMProj = file.Filename
break
}
// Find first non-mmproj file
for _, file := range cfg.Files {
if strings.Contains(strings.ToLower(file.Filename), "mmproj") {
continue
}
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: file.Filename,
},
}
break
}
}
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err
}
cfg.ConfigFile = string(data)
return cfg, nil
}

View File

@@ -0,0 +1,131 @@
package importers_test
import (
"encoding/json"
"fmt"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("LlamaCPPImporter", func() {
var importer *importers.LlamaCPPImporter
BeforeEach(func() {
importer = &importers.LlamaCPPImporter{}
})
Context("Match", func() {
It("should match when URI ends with .gguf", func() {
details := importers.Details{
URI: "https://example.com/model.gguf",
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when backend preference is llama-cpp", func() {
preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should not match when URI does not end with .gguf and no backend preference", func() {
details := importers.Details{
URI: "https://example.com/model.bin",
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should not match when backend preference is different", func() {
preferences := json.RawMessage(`{"backend": "mlx"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should return false when JSON preferences are invalid", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://example.com/model.gguf",
Preferences: preferences,
}
// Invalid JSON causes Match to return false early
result := importer.Match(details)
Expect(result).To(BeFalse())
})
})
Context("Import", func() {
It("should import model config with default name and description", func() {
details := importers.Details{
URI: "https://example.com/my-model.gguf",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("my-model.gguf"))
Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should import model config with custom name and description from preferences", func() {
preferences := json.RawMessage(`{"name": "custom-model", "description": "Custom description"}`)
details := importers.Details{
URI: "https://example.com/my-model.gguf",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-model"))
Expect(modelConfig.Description).To(Equal("Custom description"))
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should handle invalid JSON preferences", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://example.com/my-model.gguf",
Preferences: preferences,
}
_, err := importer.Import(details)
Expect(err).To(HaveOccurred())
})
It("should extract filename correctly from URI with path", func() {
details := importers.Details{
URI: "https://example.com/path/to/model.gguf",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/path/to/model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
})
})

View File

@@ -0,0 +1,94 @@
package importers
import (
"encoding/json"
"path/filepath"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"go.yaml.in/yaml/v2"
)
var _ Importer = &MLXImporter{}
type MLXImporter struct{}
func (i *MLXImporter) Match(details Details) bool {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return false
}
b, ok := preferencesMap["backend"].(string)
if ok && b == "mlx" || b == "mlx-vlm" {
return true
}
// All https://huggingface.co/mlx-community/*
if strings.Contains(details.URI, "mlx-community/") {
return true
}
return false
}
func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return gallery.ModelConfig{}, err
}
name, ok := preferencesMap["name"].(string)
if !ok {
name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
description = "Imported from " + details.URI
}
backend := "mlx"
b, ok := preferencesMap["backend"].(string)
if ok {
backend = b
}
modelConfig := config.ModelConfig{
Name: name,
Description: description,
KnownUsecaseStrings: []string{"chat"},
Backend: backend,
PredictionOptions: schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: details.URI,
},
},
TemplateConfig: config.TemplateConfig{
UseTokenizerTemplate: true,
},
}
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err
}
return gallery.ModelConfig{
Name: name,
Description: description,
ConfigFile: string(data),
}, nil
}

View File

@@ -0,0 +1,147 @@
package importers_test
import (
"encoding/json"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("MLXImporter", func() {
var importer *importers.MLXImporter
BeforeEach(func() {
importer = &importers.MLXImporter{}
})
Context("Match", func() {
It("should match when URI contains mlx-community/", func() {
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when backend preference is mlx", func() {
preferences := json.RawMessage(`{"backend": "mlx"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when backend preference is mlx-vlm", func() {
preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should not match when URI does not contain mlx-community/ and no backend preference", func() {
details := importers.Details{
URI: "https://huggingface.co/other-org/test-model",
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should not match when backend preference is different", func() {
preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should return false when JSON preferences are invalid", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
// Invalid JSON causes Match to return false early
result := importer.Match(details)
Expect(result).To(BeFalse())
})
})
Context("Import", func() {
It("should import model config with default name and description", func() {
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/mlx-community/test-model"))
})
It("should import model config with custom name and description from preferences", func() {
preferences := json.RawMessage(`{"name": "custom-mlx-model", "description": "Custom MLX description"}`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-mlx-model"))
Expect(modelConfig.Description).To(Equal("Custom MLX description"))
})
It("should use custom backend from preferences", func() {
preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
})
It("should handle invalid JSON preferences", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
_, err := importer.Import(details)
Expect(err).To(HaveOccurred())
})
It("should extract filename correctly from URI with path", func() {
details := importers.Details{
URI: "https://huggingface.co/mlx-community/path/to/model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("model"))
})
})
})

View File

@@ -200,11 +200,16 @@ func API(application *application.Application) (*fiber.App, error) {
requestExtractor := middleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
// Create opcache for tracking UI operations (used by both UI and LocalAI routes)
var opcache *services.OpCache
if !application.ApplicationConfig().DisableWebUI {
opcache = services.NewOpCache(application.GalleryService())
}
routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
if !application.ApplicationConfig().DisableWebUI {
// Create opcache for tracking UI operations
opcache := services.NewOpCache(application.GalleryService())
routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
}

View File

@@ -76,7 +76,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) err
if err != nil {
return err
}
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
ID: uuid.String(),
GalleryElementName: input.ID,
Galleries: mgs.galleries,
@@ -95,7 +95,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() func(c *fiber.Ctx) er
return func(c *fiber.Ctx) error {
backendName := c.Params("name")
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
Delete: true,
GalleryElementName: backendName,
Galleries: mgs.galleries,

View File

@@ -77,7 +77,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
if err != nil {
return err
}
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: input.GalleryModel,
ID: uuid.String(),
GalleryElementName: input.ID,
@@ -98,7 +98,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
return func(c *fiber.Ctx) error {
modelName := c.Params("name")
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Delete: true,
GalleryElementName: modelName,
}

View File

@@ -2,16 +2,72 @@ package localai
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/gallery/importers"
httpUtils "github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/utils"
"gopkg.in/yaml.v3"
)
// ImportModelURIEndpoint handles creating new model configurations from a URI
func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler {
return func(c *fiber.Ctx) error {
input := new(schema.ImportModelRequest)
if err := c.BodyParser(input); err != nil {
return err
}
modelConfig, err := importers.DiscoverModelConfig(input.URI, input.Preferences)
if err != nil {
return fmt.Errorf("failed to discover model config: %w", err)
}
uuid, err := uuid.NewUUID()
if err != nil {
return err
}
// Determine gallery ID for tracking - use model name if available, otherwise use URI
galleryID := input.URI
if modelConfig.Name != "" {
galleryID = modelConfig.Name
}
// Register operation in opcache if available (for UI progress tracking)
if opcache != nil {
opcache.Set(galleryID, uuid.String())
}
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: gallery.GalleryModel{
Overrides: map[string]interface{}{},
},
ID: uuid.String(),
GalleryElementName: galleryID,
GalleryElement: &modelConfig,
BackendGalleries: appConfig.BackendGalleries,
}
return c.JSON(schema.GalleryResponse{
ID: uuid.String(),
StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()),
})
}
}
// ImportModelEndpoint handles creating new model configurations
func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) fiber.Handler {
return func(c *fiber.Ctx) error {

View File

@@ -18,7 +18,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
cl *config.ModelConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
galleryService *services.GalleryService) {
galleryService *services.GalleryService,
opcache *services.OpCache) {
router.Get("/swagger/*", swagger.HandlerDefault) // default
@@ -57,6 +58,9 @@ func RegisterLocalAIRoutes(router *fiber.App,
// Custom model import endpoint
router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig))
// URI model import endpoint
router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache))
// Custom model edit endpoint
router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig))

View File

@@ -248,7 +248,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
uid := id.String()
opcache.Set(galleryID, uid)
op := services.GalleryOp[gallery.GalleryModel]{
op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uid,
GalleryElementName: galleryID,
Galleries: appConfig.Galleries,
@@ -291,7 +291,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
opcache.Set(galleryID, uid)
op := services.GalleryOp[gallery.GalleryModel]{
op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uid,
Delete: true,
GalleryElementName: galleryName,
@@ -526,7 +526,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
uid := id.String()
opcache.Set(backendID, uid)
op := services.GalleryOp[gallery.GalleryBackend]{
op := services.GalleryOp[gallery.GalleryBackend, any]{
ID: uid,
GalleryElementName: backendID,
Galleries: appConfig.BackendGalleries,
@@ -568,7 +568,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
opcache.Set(backendID, uid)
op := services.GalleryOp[gallery.GalleryBackend]{
op := services.GalleryOp[gallery.GalleryBackend, any]{
ID: uid,
Delete: true,
GalleryElementName: backendName,

View File

@@ -3,9 +3,10 @@
{{template "views/partials/head" .}}
<body class="bg-[#101827] text-[#E5E7EB]">
<div class="flex flex-col min-h-screen">
<div class="flex flex-col min-h-screen" x-data="importModel()" x-init="init()">
{{template "views/partials/navbar" .}}
{{template "views/partials/inprogress" .}}
<div class="container mx-auto px-4 py-8 flex-grow">
<!-- Hero Header -->
@@ -24,19 +25,44 @@
{{if .ModelName}}Edit Model: {{.ModelName}}{{else}}Import New Model{{end}}
</span>
</h1>
<p class="text-lg text-gray-300 font-light">Configure your model settings using YAML</p>
<p class="text-lg text-gray-300 font-light" x-text="isAdvancedMode ? 'Configure your model settings using YAML' : 'Import a model from URI with preferences'"></p>
</div>
<div class="flex gap-3">
<button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
<i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
<span>Validate</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
<button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
<i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
<span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
<!-- Mode Toggle (only show when not in edit mode) -->
<template x-if="!isEditMode">
<button @click="toggleMode()"
class="group relative inline-flex items-center bg-gradient-to-r from-gray-600 to-gray-700 hover:from-gray-700 hover:to-gray-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl">
<i class="fas group-hover:animate-pulse" :class="isAdvancedMode ? 'fa-magic mr-2' : 'fa-code mr-2'"></i>
<span x-text="isAdvancedMode ? 'Simple Mode' : 'Advanced Mode'"></span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
</template>
<!-- Advanced Mode Buttons -->
<template x-if="isAdvancedMode">
<div class="flex gap-3">
<button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
<i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
<span>Validate</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
<button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
<i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
<span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
</div>
</template>
<!-- Simple Mode Button -->
<template x-if="!isAdvancedMode && !isEditMode">
<button @click="submitImport()"
:disabled="isSubmitting || !importUri.trim()"
:class="(isSubmitting || !importUri.trim()) ? 'opacity-50 cursor-not-allowed' : ''"
class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
<i class="fas group-hover:animate-pulse" :class="isSubmitting ? 'fa-spinner fa-spin mr-2' : 'fa-upload mr-2'"></i>
<span x-text="isSubmitting ? 'Importing...' : 'Import Model'"></span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
</template>
</div>
</div>
</div>
@@ -45,8 +71,187 @@
<!-- Alert Messages -->
<div id="alertContainer" class="mb-6"></div>
<!-- YAML Editor Panel -->
<div class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
<!-- Simple Import Mode -->
<div x-show="!isAdvancedMode && !isEditMode"
x-transition:enter="transition ease-out duration-300"
x-transition:enter-start="opacity-0 transform translate-y-4"
x-transition:enter-end="opacity-100 transform translate-y-0"
class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm p-8">
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-green-500/5 to-emerald-500/5"></div>
<div class="relative space-y-6">
<h2 class="text-2xl font-semibold text-white flex items-center gap-3 mb-6">
<div class="w-10 h-10 rounded-lg bg-green-500/20 flex items-center justify-center">
<i class="fas fa-link text-green-400"></i>
</div>
Import from URI
</h2>
<!-- URI Input -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-link mr-2"></i>Model URI
</label>
<input
x-model="importUri"
type="text"
placeholder="https://example.com/model.gguf or file:///path/to/model.gguf"
class="w-full px-4 py-3 bg-gray-900/90 border border-gray-700/70 rounded-xl text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-2 text-xs text-gray-400">
Enter the URI or path to the model file you want to import
</p>
</div>
<!-- Preferences Section -->
<div>
<div class="flex items-center justify-between mb-4">
<label class="block text-sm font-medium text-gray-300">
<i class="fas fa-cog mr-2"></i>Preferences (Optional)
</label>
</div>
<!-- Common Preferences -->
<div class="space-y-4 mb-6 p-4 bg-gray-900/50 rounded-xl border border-gray-700/50">
<h3 class="text-sm font-semibold text-gray-300 mb-3 flex items-center">
<i class="fas fa-star mr-2 text-yellow-400"></i>Common Preferences
</h3>
<!-- Backend Selection -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-server mr-2"></i>Backend
</label>
<select
x-model="commonPreferences.backend"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<option value="">Auto-detect (based on URI)</option>
<option value="llama-cpp">llama-cpp</option>
<option value="mlx">mlx</option>
<option value="mlx-vlm">mlx-vlm</option>
</select>
<p class="mt-1 text-xs text-gray-400">
Force a specific backend. Leave empty to auto-detect from URI.
</p>
</div>
<!-- Model Name -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-tag mr-2"></i>Model Name
</label>
<input
x-model="commonPreferences.name"
type="text"
placeholder="Leave empty to use filename"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Custom name for the model. If empty, the filename will be used.
</p>
</div>
<!-- Description -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-align-left mr-2"></i>Description
</label>
<textarea
x-model="commonPreferences.description"
rows="3"
placeholder="Leave empty to use default description"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all resize-none"
:disabled="isSubmitting"></textarea>
<p class="mt-1 text-xs text-gray-400">
Custom description for the model. If empty, a default description will be generated.
</p>
</div>
<!-- Quantizations -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-layer-group mr-2"></i>Quantizations
</label>
<input
x-model="commonPreferences.quantizations"
type="text"
placeholder="q4_k_m,q4_k_s,q3_k_m (comma-separated)"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).
</p>
</div>
<!-- MMProj Quantizations -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-image mr-2"></i>MMProj Quantizations
</label>
<input
x-model="commonPreferences.mmproj_quantizations"
type="text"
placeholder="fp16,fp32 (comma-separated)"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Preferred MMProj quantizations (comma-separated). Examples: fp16, fp32. Leave empty to use default (fp16).
</p>
</div>
</div>
<!-- Custom Preferences -->
<div class="space-y-3">
<div class="flex items-center justify-between mb-3">
<label class="block text-sm font-medium text-gray-300">
<i class="fas fa-sliders-h mr-2"></i>Custom Preferences
</label>
<button @click="addPreference()"
:disabled="isSubmitting"
class="text-sm px-3 py-1.5 rounded-lg bg-green-600/20 hover:bg-green-600/30 text-green-300 border border-green-500/30 transition-all">
<i class="fas fa-plus mr-1"></i>Add Custom
</button>
</div>
<div class="space-y-3" x-show="preferences.length > 0">
<template x-for="(pref, index) in preferences" :key="index">
<div class="flex gap-3 items-center">
<input
x-model="pref.key"
type="text"
placeholder="Key"
class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<span class="text-gray-400">:</span>
<input
x-model="pref.value"
type="text"
placeholder="Value"
class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<button @click="removePreference(index)"
:disabled="isSubmitting"
class="px-3 py-2 rounded-lg bg-red-600/20 hover:bg-red-600/30 text-red-300 border border-red-500/30 transition-all">
<i class="fas fa-trash"></i>
</button>
</div>
</template>
</div>
<p class="mt-2 text-xs text-gray-400">
Add custom key-value pairs for advanced configuration
</p>
</div>
</div>
</div>
</div>
<!-- Advanced YAML Editor Panel -->
<div x-show="isAdvancedMode || isEditMode"
x-transition:enter="transition ease-out duration-300"
x-transition:enter-start="opacity-0 transform translate-y-4"
x-transition:enter-end="opacity-100 transform translate-y-0"
class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-fuchsia-500/5 to-purple-500/5"></div>
<div class="relative sticky top-0 bg-gray-800/95 border-b border-gray-700/50 p-6 flex items-center justify-between z-10 backdrop-blur-sm">
@@ -144,22 +349,22 @@
}
/* Enhanced YAML Syntax Highlighting */
.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for YAML keys */
.cm-string { color: #10b981 !important; } /* Emerald for strings */
.cm-number { color: #f59e0b !important; } /* Amber for numbers */
.cm-comment { color: #6b7280 !important; font-style: italic !important; } /* Gray for comments */
.cm-property { color: #ec4899 !important; } /* Pink for properties */
.cm-operator { color: #ef4444 !important; } /* Red for operators */
.cm-variable { color: #06b6d4 !important; } /* Cyan for variables */
.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for tags */
.cm-attribute { color: #f59e0b !important; } /* Amber for attributes */
.cm-def { color: #ec4899 !important; font-weight: 600 !important; } /* Pink for definitions */
.cm-bracket { color: #d1d5db !important; } /* Light gray for brackets */
.cm-punctuation { color: #d1d5db !important; } /* Light gray for punctuation */
.cm-quote { color: #10b981 !important; } /* Emerald for quotes */
.cm-meta { color: #6b7280 !important; } /* Gray for meta */
.cm-builtin { color: #f472b6 !important; } /* Pink for builtins */
.cm-atom { color: #f59e0b !important; } /* Amber for atoms like true/false/null */
.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; }
.cm-string { color: #10b981 !important; }
.cm-number { color: #f59e0b !important; }
.cm-comment { color: #6b7280 !important; font-style: italic !important; }
.cm-property { color: #ec4899 !important; }
.cm-operator { color: #ef4444 !important; }
.cm-variable { color: #06b6d4 !important; }
.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; }
.cm-attribute { color: #f59e0b !important; }
.cm-def { color: #ec4899 !important; font-weight: 600 !important; }
.cm-bracket { color: #d1d5db !important; }
.cm-punctuation { color: #d1d5db !important; }
.cm-quote { color: #10b981 !important; }
.cm-meta { color: #6b7280 !important; }
.cm-builtin { color: #f472b6 !important; }
.cm-atom { color: #f59e0b !important; }
/* Enhanced scrollbar styling */
.CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler {
@@ -242,22 +447,221 @@
</style>
<script>
class ModelEditor {
constructor() {
this.modelName = '{{.ModelName}}';
this.isEditMode = !!this.modelName;
this.yamlEditor = null;
function importModel() {
return {
isAdvancedMode: false,
isEditMode: {{if .ModelName}}true{{else}}false{{end}},
importUri: '',
preferences: [],
commonPreferences: {
backend: '',
name: '',
description: '',
quantizations: '',
mmproj_quantizations: ''
},
isSubmitting: false,
currentJobId: null,
jobPollInterval: null,
yamlEditor: null,
modelEditor: null,
this.init();
}
init() {
this.initializeCodeMirror();
this.bindEvents();
}
getDefaultConfig() {
return `# Model Configuration
init() {
// If in edit mode, always show advanced mode
if (this.isEditMode) {
this.isAdvancedMode = true;
}
// Initialize YAML editor if in advanced mode
if (this.isAdvancedMode || this.isEditMode) {
this.$nextTick(() => {
this.initializeCodeMirror();
this.bindAdvancedEvents();
});
}
},
toggleMode() {
this.isAdvancedMode = !this.isAdvancedMode;
if (this.isAdvancedMode) {
this.$nextTick(() => {
this.initializeCodeMirror();
this.bindAdvancedEvents();
});
}
},
addPreference() {
this.preferences.push({ key: '', value: '' });
},
removePreference(index) {
this.preferences.splice(index, 1);
},
async submitImport() {
if (!this.importUri.trim()) {
this.showAlert('error', 'Please enter a model URI');
return;
}
this.isSubmitting = true;
try {
// Build preferences object starting with common preferences
const prefsObj = {};
// Add common preferences (only non-empty values)
if (this.commonPreferences.backend && this.commonPreferences.backend.trim()) {
prefsObj.backend = this.commonPreferences.backend.trim();
}
if (this.commonPreferences.name && this.commonPreferences.name.trim()) {
prefsObj.name = this.commonPreferences.name.trim();
}
if (this.commonPreferences.description && this.commonPreferences.description.trim()) {
prefsObj.description = this.commonPreferences.description.trim();
}
if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) {
prefsObj.quantizations = this.commonPreferences.quantizations.trim();
}
if (this.commonPreferences.mmproj_quantizations && this.commonPreferences.mmproj_quantizations.trim()) {
prefsObj.mmproj_quantizations = this.commonPreferences.mmproj_quantizations.trim();
}
// Add custom preferences (can override common ones)
this.preferences.forEach(pref => {
if (pref.key && pref.value) {
prefsObj[pref.key.trim()] = pref.value.trim();
}
});
const requestBody = {
uri: this.importUri.trim(),
preferences: Object.keys(prefsObj).length > 0 ? prefsObj : null
};
const response = await fetch('/models/import-uri', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody)
});
if (!response.ok) {
const error = await response.json().catch(() => ({ error: 'Failed to start import' }));
throw new Error(error.error || 'Failed to start import');
}
const result = await response.json();
if (result.uuid) {
this.currentJobId = result.uuid;
this.showAlert('success', 'Import started! Tracking progress...');
this.startJobPolling();
} else if (result.ID) {
// Fallback for different response format
this.currentJobId = result.ID;
this.showAlert('success', 'Import started! Tracking progress...');
this.startJobPolling();
} else {
throw new Error('No job ID returned from server');
}
} catch (error) {
this.showAlert('error', 'Failed to start import: ' + error.message);
this.isSubmitting = false;
}
},
startJobPolling() {
if (this.jobPollInterval) {
clearInterval(this.jobPollInterval);
}
this.jobPollInterval = setInterval(async () => {
if (!this.currentJobId) {
clearInterval(this.jobPollInterval);
return;
}
try {
const response = await fetch(`/models/jobs/${this.currentJobId}`);
if (!response.ok) {
return;
}
const jobData = await response.json();
if (jobData.completed) {
clearInterval(this.jobPollInterval);
this.isSubmitting = false;
this.currentJobId = null;
this.showAlert('success', 'Model imported successfully! Refreshing page...');
// Refresh the page after a short delay
setTimeout(() => {
window.location.reload();
}, 2000);
} else if (jobData.error) {
clearInterval(this.jobPollInterval);
this.isSubmitting = false;
this.currentJobId = null;
this.showAlert('error', 'Import failed: ' + jobData.error);
}
} catch (error) {
console.error('Error polling job status:', error);
}
}, 1000);
},
initializeCodeMirror() {
if (this.yamlEditor) {
return; // Already initialized
}
const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
mode: 'yaml',
theme: 'default',
lineNumbers: true,
autoRefresh: true,
indentUnit: 2,
tabSize: 2,
indentWithTabs: false,
lineWrapping: true,
styleActiveLine: true,
matchBrackets: true,
autoCloseBrackets: true,
value: initialValue
});
},
bindAdvancedEvents() {
if (!this.yamlEditor) return;
// Button events
const saveBtn = document.getElementById('saveBtn');
const validateBtn = document.getElementById('validateBtn');
const formatYamlBtn = document.getElementById('formatYamlBtn');
const copyYamlBtn = document.getElementById('copyYamlBtn');
if (saveBtn) {
saveBtn.addEventListener('click', () => this.saveConfig());
}
if (validateBtn) {
validateBtn.addEventListener('click', () => this.validateConfig());
}
if (formatYamlBtn) {
formatYamlBtn.addEventListener('click', () => this.formatYaml());
}
if (copyYamlBtn) {
copyYamlBtn.addEventListener('click', () => this.copyYaml());
}
},
getDefaultConfig() {
return `# Model Configuration
name: my-model
backend: llama-cpp
parameters:
@@ -286,186 +690,150 @@ parameters:
# - chat
# - completion
`;
}
initializeCodeMirror() {
const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
},
this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
mode: 'yaml',
theme: 'default',
lineNumbers: true,
autoRefresh: true,
indentUnit: 2,
tabSize: 2,
indentWithTabs: false,
lineWrapping: true,
styleActiveLine: true,
matchBrackets: true,
autoCloseBrackets: true,
value: initialValue
});
}
bindEvents() {
// Button events
document.getElementById('saveBtn').addEventListener('click', () => this.saveConfig());
document.getElementById('validateBtn').addEventListener('click', () => this.validateConfig());
document.getElementById('formatYamlBtn').addEventListener('click', () => this.formatYaml());
document.getElementById('copyYamlBtn').addEventListener('click', () => this.copyYaml());
}
validateConfig() {
try {
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
this.showAlert('success', 'Configuration is valid!');
} catch (error) {
this.showAlert('error', 'Validation failed: ' + error.message);
}
}
async saveConfig() {
try {
// Validate before saving
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
const endpoint = this.isEditMode ? `/models/edit/${this.modelName}` : '/models/import';
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/x-yaml',
},
body: yamlContent
});
const result = await response.json();
if (result.success) {
this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
if (!this.isEditMode && config.name) {
setTimeout(() => {
window.location.href = `/models/edit/${config.name}`;
}, 2000);
validateConfig() {
try {
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
} else {
this.showAlert('error', result.error || 'Failed to save configuration');
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
this.showAlert('success', 'Configuration is valid!');
} catch (error) {
this.showAlert('error', 'Validation failed: ' + error.message);
}
} catch (error) {
this.showAlert('error', 'Failed to save: ' + error.message);
}
}
},
async saveConfig() {
try {
// Validate before saving
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
const endpoint = this.isEditMode ? `/models/edit/{{.ModelName}}` : '/models/import';
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/x-yaml',
},
body: yamlContent
});
formatYaml() {
try {
const result = await response.json();
if (result.success) {
this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
if (!this.isEditMode && config.name) {
setTimeout(() => {
window.location.href = `/models/edit/${config.name}`;
}, 2000);
}
} else {
this.showAlert('error', result.error || 'Failed to save configuration');
}
} catch (error) {
this.showAlert('error', 'Failed to save: ' + error.message);
}
},
formatYaml() {
try {
const yamlContent = this.yamlEditor.getValue();
const parsed = jsyaml.load(yamlContent);
const formatted = jsyaml.dump(parsed, {
indent: 2,
lineWidth: 120,
noRefs: true,
sortKeys: false
});
this.yamlEditor.setValue(formatted);
this.showAlert('success', 'YAML formatted successfully');
} catch (error) {
this.showAlert('error', 'Failed to format YAML: ' + error.message);
}
},
copyYaml() {
const yamlContent = this.yamlEditor.getValue();
const parsed = jsyaml.load(yamlContent);
const formatted = jsyaml.dump(parsed, {
indent: 2,
lineWidth: 120,
noRefs: true,
sortKeys: false
navigator.clipboard.writeText(yamlContent).then(() => {
this.showAlert('success', 'YAML copied to clipboard');
}).catch(err => {
// Fallback for older browsers
const textArea = document.createElement('textarea');
textArea.value = yamlContent;
document.body.appendChild(textArea);
textArea.select();
document.execCommand('copy');
document.body.removeChild(textArea);
this.showAlert('success', 'YAML copied to clipboard');
});
this.yamlEditor.setValue(formatted);
this.showAlert('success', 'YAML formatted successfully');
} catch (error) {
this.showAlert('error', 'Failed to format YAML: ' + error.message);
}
}
copyYaml() {
const yamlContent = this.yamlEditor.getValue();
navigator.clipboard.writeText(yamlContent).then(() => {
this.showAlert('success', 'YAML copied to clipboard');
}).catch(err => {
// Fallback for older browsers
const textArea = document.createElement('textarea');
textArea.value = yamlContent;
document.body.appendChild(textArea);
textArea.select();
document.execCommand('copy');
document.body.removeChild(textArea);
this.showAlert('success', 'YAML copied to clipboard');
});
}
showAlert(type, message) {
const container = document.getElementById('alertContainer');
const alertClasses = {
success: 'alert alert-success',
error: 'alert alert-error',
warning: 'alert alert-warning',
info: 'alert alert-info'
};
},
const alertIcons = {
success: 'fas fa-check-circle',
error: 'fas fa-exclamation-triangle',
warning: 'fas fa-exclamation-circle',
info: 'fas fa-info-circle'
};
container.innerHTML = `
<div class="${alertClasses[type]}">
<div class="flex items-center">
<i class="${alertIcons[type]} mr-3 text-lg"></i>
<span class="flex-1">${message}</span>
<button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
<i class="fas fa-times"></i>
</button>
showAlert(type, message) {
const container = document.getElementById('alertContainer');
const alertClasses = {
success: 'alert alert-success',
error: 'alert alert-error',
warning: 'alert alert-warning',
info: 'alert alert-info'
};
const alertIcons = {
success: 'fas fa-check-circle',
error: 'fas fa-exclamation-triangle',
warning: 'fas fa-exclamation-circle',
info: 'fas fa-info-circle'
};
container.innerHTML = `
<div class="${alertClasses[type]}">
<div class="flex items-center">
<i class="${alertIcons[type]} mr-3 text-lg"></i>
<span class="flex-1">${message}</span>
<button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
<i class="fas fa-times"></i>
</button>
</div>
</div>
</div>
`;
if (type === 'success' || type === 'info') {
setTimeout(() => {
const alert = container.querySelector('div');
if (alert) alert.remove();
}, 5000);
`;
if (type === 'success' || type === 'info') {
setTimeout(() => {
const alert = container.querySelector('div');
if (alert) alert.remove();
}, 5000);
}
}
}
clearAlert() {
document.getElementById('alertContainer').innerHTML = '';
}
}
// Initialize the editor when the page loads
let modelEditor;
document.addEventListener('DOMContentLoaded', () => {
modelEditor = new ModelEditor();
});
</script>
</body>

View File

@@ -120,8 +120,17 @@ function operationsStatus() {
throw new Error('Failed to fetch operations');
}
const data = await response.json();
const previousCount = this.operations.length;
this.operations = data.operations || [];
// If we had operations before and now we don't, refresh the page
if (previousCount > 0 && this.operations.length === 0) {
// Small delay to ensure the user sees the completion
setTimeout(() => {
window.location.reload();
}, 1000);
}
// Auto-collapse if there are many operations
if (this.operations.length > 5 && !this.collapsed) {
// Don't auto-collapse, let user control it

View File

@@ -73,7 +73,7 @@ func syncState(ctx context.Context, n *node.Node, app *application.Application)
continue
}
app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uuid.String(),
GalleryElementName: model,
Galleries: app.ApplicationConfig().Galleries,

View File

@@ -1,6 +1,7 @@
package schema
import (
"encoding/json"
"time"
gopsutil "github.com/shirou/gopsutil/v3/process"
@@ -157,3 +158,8 @@ type Detection struct {
Height float32 `json:"height"`
ClassName string `json:"class_name"`
}
type ImportModelRequest struct {
URI string `json:"uri"`
Preferences json.RawMessage `json:"preferences,omitempty"`
}

View File

@@ -1,50 +1,51 @@
package schema
// @Description PredictionOptions contains prediction parameters for model inference
type PredictionOptions struct {
// Also part of the OpenAI official spec
BasicModelRequest `yaml:",inline"`
// Also part of the OpenAI official spec
Language string `json:"language"`
Language string `json:"language,omitempty" yaml:"language,omitempty"`
// Only for audio transcription
Translate bool `json:"translate"`
Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"`
// Also part of the OpenAI official spec. use it for returning multiple results
N int `json:"n"`
N int `json:"n,omitempty" yaml:"n,omitempty"`
// Common options between all the API calls, part of the OpenAI spec
TopP *float64 `json:"top_p" yaml:"top_p"`
TopK *int `json:"top_k" yaml:"top_k"`
Temperature *float64 `json:"temperature" yaml:"temperature"`
Maxtokens *int `json:"max_tokens" yaml:"max_tokens"`
Echo bool `json:"echo"`
TopP *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
TopK *int `json:"top_k,omitempty" yaml:"top_k,omitempty"`
Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
Maxtokens *int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
Echo bool `json:"echo,omitempty" yaml:"echo,omitempty"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch" yaml:"batch"`
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
Batch int `json:"batch,omitempty" yaml:"batch,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"`
RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"`
RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"`
Keep int `json:"n_keep" yaml:"n_keep"`
Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
TFZ *float64 `json:"tfz" yaml:"tfz"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
TFZ *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"`
TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
Seed *int `json:"seed" yaml:"seed"`
TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"`
Seed *int `json:"seed,omitempty" yaml:"seed,omitempty"`
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
NegativePrompt string `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"`
RopeFreqBase float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"`
RopeFreqScale float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"`
NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"`
// Diffusers
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"`
// RWKV (?)
Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
}

View File

@@ -5,8 +5,9 @@ type LocalAIRequest interface {
ModelName(*string) string
}
// @Description BasicModelRequest contains the basic model request fields
type BasicModelRequest struct {
Model string `json:"model" yaml:"model"`
Model string `json:"model,omitempty" yaml:"model,omitempty"`
// TODO: Should this also include the following fields from the OpenAI side of the world?
// If so, changes should be made to core/http/middleware/request.go to match

View File

@@ -8,7 +8,7 @@ import (
"github.com/rs/zerolog/log"
)
func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error {
func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend, any], systemState *system.SystemState) error {
utils.ResetDownloadTimers()
g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})

View File

@@ -14,8 +14,8 @@ import (
type GalleryService struct {
appConfig *config.ApplicationConfig
sync.Mutex
ModelGalleryChannel chan GalleryOp[gallery.GalleryModel]
BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend]
ModelGalleryChannel chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]
BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend, any]
modelLoader *model.ModelLoader
statuses map[string]*GalleryOpStatus
@@ -24,8 +24,8 @@ type GalleryService struct {
func NewGalleryService(appConfig *config.ApplicationConfig, ml *model.ModelLoader) *GalleryService {
return &GalleryService{
appConfig: appConfig,
ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel]),
BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend]),
ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]),
BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend, any]),
modelLoader: ml,
statuses: make(map[string]*GalleryOpStatus),
}

View File

@@ -9,10 +9,11 @@ import (
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
utils.ResetDownloadTimers()
g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
@@ -118,28 +119,32 @@ func ApplyGalleryFromString(systemState *system.SystemState, modelLoader *model.
// processModelOperation handles the installation or deletion of a model
func processModelOperation(
op *GalleryOp[gallery.GalleryModel],
op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig],
systemState *system.SystemState,
modelLoader *model.ModelLoader,
enforcePredownloadScans bool,
automaticallyInstallBackend bool,
progressCallback func(string, string, string, float64),
) error {
// delete a model
if op.Delete {
switch {
case op.Delete:
return gallery.DeleteModelFromSystem(systemState, op.GalleryElementName)
}
// if the request contains a gallery name, we apply the gallery from the gallery list
if op.GalleryElementName != "" {
case op.GalleryElement != nil:
installedModel, err := gallery.InstallModel(
systemState, op.GalleryElement.Name,
op.GalleryElement,
op.Req.Overrides,
progressCallback, enforcePredownloadScans)
if automaticallyInstallBackend && installedModel.Backend != "" {
log.Debug().Msgf("Installing backend %q", installedModel.Backend)
if err := gallery.InstallBackendFromGallery(op.BackendGalleries, systemState, modelLoader, installedModel.Backend, progressCallback, false); err != nil {
return err
}
}
return err
case op.GalleryElementName != "":
return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, systemState, modelLoader, op.GalleryElementName, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend)
// } else if op.ConfigURL != "" {
// err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL)
// if err != nil {
// return err
// }
// return cl.Preload(modelPath)
} else {
default:
return installModelFromRemoteConfig(systemState, modelLoader, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries)
}
}

View File

@@ -5,12 +5,16 @@ import (
"github.com/mudler/LocalAI/pkg/xsync"
)
type GalleryOp[T any] struct {
type GalleryOp[T any, E any] struct {
ID string
GalleryElementName string
Delete bool
Req T
Req T
// If specified, we install directly the gallery element
GalleryElement *E
Galleries []config.Gallery
BackendGalleries []config.Gallery
}

View File

@@ -1,15 +1,20 @@
package startup
import (
"encoding/json"
"errors"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/gallery/importers"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
@@ -25,7 +30,7 @@ const (
// InstallModels will preload models from the given list of URLs and galleries
// It will download the model if it is not already present in the model path
// It will also try to resolve if the model is an embedded model YAML configuration
func InstallModels(galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
func InstallModels(galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
// create an error that groups all errors
var err error
@@ -154,7 +159,50 @@ func InstallModels(galleries, backendGalleries []config.Gallery, systemState *sy
err = errors.Join(err, e)
} else if !found {
log.Warn().Msgf("[startup] failed resolving model '%s'", url)
err = errors.Join(err, fmt.Errorf("failed resolving model '%s'", url))
if galleryService == nil {
err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri"))
continue
}
// TODO: we should just use the discoverModelConfig here and default to this.
modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
if discoverErr != nil {
err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
continue
}
uuid, uuidErr := uuid.NewUUID()
if uuidErr != nil {
err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
continue
}
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: gallery.GalleryModel{
Overrides: map[string]interface{}{},
},
ID: uuid.String(),
GalleryElementName: modelConfig.Name,
GalleryElement: &modelConfig,
BackendGalleries: backendGalleries,
}
var status *services.GalleryOpStatus
// wait for op to finish
for {
status = galleryService.GetStatus(uuid.String())
if status != nil && status.Processed {
break
}
time.Sleep(1 * time.Second)
}
if status.Error != nil {
return status.Error
}
log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
}
}
}

View File

@@ -33,7 +33,7 @@ var _ = Describe("Preload test", func() {
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
fileName := fmt.Sprintf("%s.yaml", "phi-2")
InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
resultFile := filepath.Join(tmpdir, fileName)
@@ -46,7 +46,7 @@ var _ = Describe("Preload test", func() {
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
err := InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
err := InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
Expect(err).ToNot(HaveOccurred())
resultFile := filepath.Join(tmpdir, fileName)

2
go.mod
View File

@@ -59,6 +59,7 @@ require (
go.opentelemetry.io/otel/metric v1.38.0
go.opentelemetry.io/otel/sdk/metric v1.38.0
google.golang.org/grpc v1.76.0
google.golang.org/protobuf v1.36.8
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
oras.land/oras-go/v2 v2.6.0
@@ -148,7 +149,6 @@ require (
golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 // indirect
golang.org/x/time v0.12.0 // indirect
google.golang.org/protobuf v1.36.8 // indirect
)
require (

View File

@@ -13,99 +13,102 @@ import (
"github.com/rs/zerolog/log"
)
// @Description GrammarConfig contains configuration for grammar parsing
type GrammarConfig struct {
// ParallelCalls enables the LLM to return multiple function calls in the same response
ParallelCalls bool `yaml:"parallel_calls"`
ParallelCalls bool `yaml:"parallel_calls,omitempty" json:"parallel_calls,omitempty"`
DisableParallelNewLines bool `yaml:"disable_parallel_new_lines"`
DisableParallelNewLines bool `yaml:"disable_parallel_new_lines,omitempty" json:"disable_parallel_new_lines,omitempty"`
// MixedMode enables the LLM to return strings and not only JSON objects
// This is useful for models to not constraining returning only JSON and also messages back to the user
MixedMode bool `yaml:"mixed_mode"`
MixedMode bool `yaml:"mixed_mode,omitempty" json:"mixed_mode,omitempty"`
// NoMixedFreeString disables the mixed mode for free strings
// In this way if the LLM selects a free string, it won't be mixed necessarily with JSON objects.
// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
NoMixedFreeString bool `yaml:"no_mixed_free_string"`
NoMixedFreeString bool `yaml:"no_mixed_free_string,omitempty" json:"no_mixed_free_string,omitempty"`
// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
NoGrammar bool `yaml:"disable"`
NoGrammar bool `yaml:"disable,omitempty" json:"disable,omitempty"`
// Prefix is the suffix to append to the grammar when being generated
// This is useful when models prepend a tag before returning JSON
Prefix string `yaml:"prefix"`
Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
// ExpectStringsAfterJSON enables mixed string suffix
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json,omitempty" json:"expect_strings_after_json,omitempty"`
// PropOrder selects what order to print properties
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
PropOrder string `yaml:"properties_order"`
PropOrder string `yaml:"properties_order,omitempty" json:"properties_order,omitempty"`
// SchemaType can be configured to use a specific schema type to force the grammar
// available : json, llama3.1
SchemaType string `yaml:"schema_type"`
SchemaType string `yaml:"schema_type,omitempty" json:"schema_type,omitempty"`
GrammarTriggers []GrammarTrigger `yaml:"triggers"`
GrammarTriggers []GrammarTrigger `yaml:"triggers,omitempty" json:"triggers,omitempty"`
}
// @Description GrammarTrigger defines a trigger word for grammar parsing
type GrammarTrigger struct {
// Trigger is the string that triggers the grammar
Word string `yaml:"word"`
Word string `yaml:"word,omitempty" json:"word,omitempty"`
}
// FunctionsConfig is the configuration for the tool/function call.
// @Description FunctionsConfig is the configuration for the tool/function call.
// It includes setting to map the function name and arguments from the response
// and, for instance, also if processing the requests with BNF grammars.
type FunctionsConfig struct {
// DisableNoAction disables the "no action" tool
// By default we inject a tool that does nothing and is used to return an answer from the LLM
DisableNoAction bool `yaml:"disable_no_action"`
DisableNoAction bool `yaml:"disable_no_action,omitempty" json:"disable_no_action,omitempty"`
// Grammar is the configuration for the grammar
GrammarConfig GrammarConfig `yaml:"grammar"`
GrammarConfig GrammarConfig `yaml:"grammar,omitempty" json:"grammar,omitempty"`
// NoActionFunctionName is the name of the function that does nothing. It defaults to "answer"
NoActionFunctionName string `yaml:"no_action_function_name"`
NoActionFunctionName string `yaml:"no_action_function_name,omitempty" json:"no_action_function_name,omitempty"`
// NoActionDescriptionName is the name of the function that returns the description of the no action function
NoActionDescriptionName string `yaml:"no_action_description_name"`
NoActionDescriptionName string `yaml:"no_action_description_name,omitempty" json:"no_action_description_name,omitempty"`
// ResponseRegex is a named regex to extract the function name and arguments from the response
ResponseRegex []string `yaml:"response_regex"`
ResponseRegex []string `yaml:"response_regex,omitempty" json:"response_regex,omitempty"`
// JSONRegexMatch is a regex to extract the JSON object from the response
JSONRegexMatch []string `yaml:"json_regex_match"`
JSONRegexMatch []string `yaml:"json_regex_match,omitempty" json:"json_regex_match,omitempty"`
// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
ArgumentRegex []string `yaml:"argument_regex"`
ArgumentRegex []string `yaml:"argument_regex,omitempty" json:"argument_regex,omitempty"`
// ArgumentRegex named regex names for key and value extractions. default: key and value
ArgumentRegexKey string `yaml:"argument_regex_key_name"` // default: key
ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
ArgumentRegexKey string `yaml:"argument_regex_key_name,omitempty" json:"argument_regex_key_name,omitempty"` // default: key
ArgumentRegexValue string `yaml:"argument_regex_value_name,omitempty" json:"argument_regex_value_name,omitempty"` // default: value
// ReplaceFunctionResults allow to replace strings in the results before parsing them
ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results,omitempty" json:"replace_function_results,omitempty"`
// ReplaceLLMResult allow to replace strings in the results before parsing them
ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"`
ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results,omitempty" json:"replace_llm_results,omitempty"`
// CaptureLLMResult is a regex to extract a string from the LLM response
// that is used as return string when using tools.
// This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back
CaptureLLMResult []string `yaml:"capture_llm_results"`
CaptureLLMResult []string `yaml:"capture_llm_results,omitempty" json:"capture_llm_results,omitempty"`
// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
// This might be useful for certain models trained with the function name as the first token.
FunctionNameKey string `yaml:"function_name_key"`
FunctionArgumentsKey string `yaml:"function_arguments_key"`
FunctionNameKey string `yaml:"function_name_key,omitempty" json:"function_name_key,omitempty"`
FunctionArgumentsKey string `yaml:"function_arguments_key,omitempty" json:"function_arguments_key,omitempty"`
}
// @Description ReplaceResult defines a key-value replacement for function results
type ReplaceResult struct {
Key string `yaml:"key"`
Value string `yaml:"value"`
Key string `yaml:"key,omitempty" json:"key,omitempty"`
Value string `yaml:"value,omitempty" json:"value,omitempty"`
}
type FuncCallResults struct {

View File

@@ -51,6 +51,7 @@ type ModelFile struct {
Size int64
SHA256 string
IsReadme bool
URL string
}
// ModelDetails represents detailed information about a model
@@ -215,6 +216,7 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
}
// Process each file
baseURL := strings.TrimSuffix(c.baseURL, "/api/models")
for _, file := range files {
fileName := filepath.Base(file.Path)
isReadme := strings.Contains(strings.ToLower(fileName), "readme")
@@ -227,11 +229,16 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
sha256 = file.Oid
}
// Construct the full URL for the file
// Use /resolve/main/ for downloading files (handles LFS properly)
fileURL := fmt.Sprintf("%s/%s/resolve/main/%s", baseURL, repoID, file.Path)
modelFile := ModelFile{
Path: file.Path,
Size: file.Size,
SHA256: sha256,
IsReadme: isReadme,
URL: fileURL,
}
details.Files = append(details.Files, modelFile)

View File

@@ -1,6 +1,7 @@
package hfapi_test
import (
"fmt"
"net/http"
"net/http/httptest"
"strings"
@@ -8,7 +9,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
var _ = Describe("HuggingFace API Client", func() {
@@ -270,6 +271,15 @@ var _ = Describe("HuggingFace API Client", func() {
})
})
Context("when getting file SHA on remote model", func() {
It("should get file SHA successfully", func() {
sha, err := client.GetFileSHA(
"mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF", "localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf")
Expect(err).ToNot(HaveOccurred())
Expect(sha).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"))
})
})
Context("when listing files", func() {
BeforeEach(func() {
mockFilesResponse := `[
@@ -329,23 +339,25 @@ var _ = Describe("HuggingFace API Client", func() {
Context("when getting file SHA", func() {
BeforeEach(func() {
mockFileInfoResponse := `{
"path": "model-Q4_K_M.gguf",
"size": 1000000,
"oid": "abc123",
"lfs": {
"oid": "sha256:def456",
mockFilesResponse := `[
{
"type": "file",
"path": "model-Q4_K_M.gguf",
"size": 1000000,
"pointer": "version https://git-lfs.github.com/spec/v1",
"sha256": "def456789"
"oid": "abc123",
"lfs": {
"oid": "def456789",
"size": 1000000,
"pointerSize": 135
}
}
}`
]`
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.Contains(r.URL.Path, "/paths-info") {
if strings.Contains(r.URL.Path, "/tree/main") {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(mockFileInfoResponse))
w.Write([]byte(mockFilesResponse))
} else {
w.WriteHeader(http.StatusNotFound)
}
@@ -363,18 +375,29 @@ var _ = Describe("HuggingFace API Client", func() {
It("should handle missing SHA gracefully", func() {
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"path": "file.txt", "size": 100}`))
if strings.Contains(r.URL.Path, "/tree/main") {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`[
{
"type": "file",
"path": "file.txt",
"size": 100,
"oid": "file123"
}
]`))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
client.SetBaseURL(server.URL)
sha, err := client.GetFileSHA("test/model", "file.txt")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("no SHA256 found"))
Expect(sha).To(Equal(""))
Expect(err).ToNot(HaveOccurred())
// When there's no LFS, it should return the OID
Expect(sha).To(Equal("file123"))
})
})
@@ -439,6 +462,13 @@ var _ = Describe("HuggingFace API Client", func() {
Expect(details.ReadmeFile).ToNot(BeNil())
Expect(details.ReadmeFile.Path).To(Equal("README.md"))
Expect(details.ReadmeFile.IsReadme).To(BeTrue())
// Verify URLs are set for all files
baseURL := strings.TrimSuffix(server.URL, "/api/models")
for _, file := range details.Files {
expectedURL := fmt.Sprintf("%s/test/model/resolve/main/%s", baseURL, file.Path)
Expect(file.URL).To(Equal(expectedURL))
}
})
})

View File

@@ -11,3 +11,5 @@ func TestHfapi(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "HuggingFace API Suite")
}