Compare commits

...

1093 Commits

Author SHA1 Message Date
LocalAI [bot]
cd2b0c0e7c chore: ⬆️ Update ggml-org/llama.cpp to 72babea5dea56c8a8e8420ccf731b12a5cf37854 (#5743)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-27 23:46:27 +02:00
LocalAI [bot]
73d80c43a8 chore: ⬆️ Update ggml-org/whisper.cpp to c88ffbf9baeaae8c2cc0a4f496618314bb2ee9e0 (#5742)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-27 23:45:57 +02:00
LocalAI [bot]
665562b850 docs: ⬆️ update docs version mudler/LocalAI (#5741)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-27 22:23:43 +02:00
Ettore Di Giacinto
7a78e4f482 fix(backends gallery): meta packages do not have URIs (#5740)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-27 22:23:14 +02:00
Ettore Di Giacinto
6f41a6f934 fix(backends gallery): correctly identify gpu vendor (#5739)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-27 22:22:58 +02:00
Ettore Di Giacinto
bb54f2da2b feat(gallery): automatically install missing backends along models (#5736)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-27 18:25:44 +02:00
Ettore Di Giacinto
e1cc7ee107 fix(ci): enable tag-latest to auto (#5738)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-27 18:17:01 +02:00
Ettore Di Giacinto
cfc9dfa3d5 fix(ci): better handling of latest images for backends (#5735)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-27 10:32:58 +02:00
LocalAI [bot]
6a650e68cb chore: ⬆️ Update ggml-org/whisper.cpp to 32cf4e2aba799aff069011f37ca025401433cf9f (#5733)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-26 22:09:26 +02:00
LocalAI [bot]
5e1373877a chore: ⬆️ Update ggml-org/llama.cpp to 8846aace4934ad29651ea61b8c7e3f6b0556e3d2 (#5734)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-26 22:09:03 +02:00
Ettore Di Giacinto
b5b0ab26e7 fix(ci): remove non-existant input from build matrix
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-26 21:42:27 +02:00
Ettore Di Giacinto
9725bb4bbd chore(model gallery): add gemma-3n-e4b-it (#5731)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-26 19:36:50 +02:00
Ettore Di Giacinto
33b4275bbc chore(model gallery): add gemma-3n-e2b-it (#5730)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-26 19:35:49 +02:00
Ettore Di Giacinto
6644af10c6 feat: ⚠️ reduce images size and stop bundling sources (#5721)
feat: reduce images size and stop bundling sources

Do not copy sources anymore, and reduce packages of the base images by
not using builder images.

If needed to rebuild, just build the container image from scratch by
following the docs. We will slowly try to migrate all backends to the
gallery to keep the core small.

This PR is a breaking change, it also sets the base folders to /models
and /backends instead of /build/models and /build/backends.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-26 18:41:38 +02:00
Ettore Di Giacinto
7c4a2e9b85 chore(ci): ⚠️ fix latest tag by using docker meta action (#5722)
chore(ci): fix latest tag by using docker meta action

Also uniform tagging names

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-26 18:40:25 +02:00
Ettore Di Giacinto
bcccee3909 fix(backends gallery): delete dangling dirs if installation failed (#5729)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-26 17:38:03 +02:00
Ettore Di Giacinto
c6f50ddd0c Revert "chore: ⬆️ Update leejet/stable-diffusion.cpp to 10c6501bd05a697e014f1bee3a84e5664290c489" (#5727)
Revert "chore: ⬆️ Update leejet/stable-diffusion.cpp to `10c6501bd05a…"

This reverts commit 30600dd5cb.
2025-06-26 13:25:25 +02:00
LocalAI [bot]
6613373b1b chore: ⬆️ Update ggml-org/whisper.cpp to 4daf7050ca2bf17f5166f45ac6da651c4e33f293 (#5725)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-26 13:00:20 +02:00
LocalAI [bot]
1659b3f795 chore: ⬆️ Update ggml-org/llama.cpp to 2bf9d539dd158345e3a3b096e16474af535265b4 (#5724)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-26 12:59:57 +02:00
LocalAI [bot]
30600dd5cb chore: ⬆️ Update leejet/stable-diffusion.cpp to 10c6501bd05a697e014f1bee3a84e5664290c489 (#4925)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-25 22:45:15 +00:00
Ettore Di Giacinto
179fcf5541 chore(model gallery): add menlo_jan-nano-128k (#5723)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-25 12:14:11 +02:00
LocalAI [bot]
9cb75086bb chore: ⬆️ Update ggml-org/whisper.cpp to 0083335ba0e9d6becbe0958903b0a27fc2ebaeed (#5718)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-25 09:47:33 +02:00
LocalAI [bot]
594bb462ab chore: ⬆️ Update ggml-org/llama.cpp to 73e53dc834c0a2336cd104473af6897197b96277 (#5719)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-24 22:47:48 +00:00
Ettore Di Giacinto
aa730a7b96 chore(model gallery): add delta-vector_austral-24b-winton (#5717)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-24 18:37:28 +02:00
Ettore Di Giacinto
0a454c527a chore(model gallery): add astrosage-70b (#5716)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-24 18:34:37 +02:00
Ettore Di Giacinto
cf86bcb984 chore(model gallery): add skywork_skywork-swe-32b (#5715)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-24 18:29:36 +02:00
Ettore Di Giacinto
a6d9988e84 feat(backend gallery): add meta packages (#5696)
* feat(backend gallery): add meta packages

So we can have meta packages such as "vllm" that automatically installs
the corresponding package depending on the GPU that is being currently
detected in the system.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: use a metadata file

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-24 17:08:27 +02:00
Ettore Di Giacinto
f3a114342e chore(model gallery): add mistralai_mistral-small-3.2-24b-instruct-2506 (#5714)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-24 13:59:14 +02:00
LocalAI [bot]
0d275ccc03 chore: ⬆️ Update ggml-org/llama.cpp to ce82bd0117bd3598300b3a089d13d401b90279c7 (#5712)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-24 08:37:32 +02:00
LocalAI [bot]
58dba3f01c chore: ⬆️ Update ggml-org/whisper.cpp to a422176937c5bb20eb58d969995765f90d3c1a9b (#5713)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-23 22:45:31 +00:00
kilavvy
b68d6e8088 Docs: Fix typos (#5709)
* Update GPU-acceleration.md

Signed-off-by: kilavvy <140459108+kilavvy@users.noreply.github.com>

* Update image-generation.md

Signed-off-by: kilavvy <140459108+kilavvy@users.noreply.github.com>

---------

Signed-off-by: kilavvy <140459108+kilavvy@users.noreply.github.com>
2025-06-23 18:15:06 +02:00
LocalAI [bot]
2352cec7e6 chore: ⬆️ Update ggml-org/llama.cpp to 238005c2dc67426cf678baa2d54c881701693288 (#5710)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-22 22:43:41 +00:00
Ettore Di Giacinto
de72ae79b5 chore(model gallery): add ds-r1-qwen3-8b-arliai-rpr-v4-small-iq-imatrix (#5708)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-22 09:05:55 +02:00
Ettore Di Giacinto
884c07d5f9 chore(model gallery): add allura-org_q3-8b-kintsugi (#5707)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-22 09:04:49 +02:00
Ettore Di Giacinto
cca7cbef1e chore(model gallery): add qwen3-the-xiaolong-omega-directive-22b-uncensored-abliterated-i1 (#5706)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-22 09:01:08 +02:00
Ettore Di Giacinto
32cd0d03d4 chore(model gallery): add menlo_jan-nano (#5705)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-22 08:57:33 +02:00
Ettore Di Giacinto
ee4d9e83d0 Update stalebot.yml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-22 08:51:13 +02:00
LocalAI [bot]
5547e08a30 chore: ⬆️ Update ggml-org/llama.cpp to aa0ef5c578eef4c2adc7be1282f21bab5f3e8d26 (#5703)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-21 23:54:53 +02:00
LocalAI [bot]
ca7385c303 chore: ⬆️ Update ggml-org/whisper.cpp to e6c10cf3d5d60dc647eb6cd5e73d3c347149f746 (#5702)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-21 23:54:28 +02:00
Ettore Di Giacinto
28759e79d3 chore(model gallery): add qwen3-the-josiefied-omega-directive-22b-uncensored-abliterated-i1 (#5704)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-21 23:54:05 +02:00
Ettore Di Giacinto
40249b6b84 Update stalebot.yml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-21 22:38:23 +02:00
Ettore Di Giacinto
e09e47bada chore(ci): add stale bot (#5700)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-21 20:12:08 +02:00
Ettore Di Giacinto
3796558aeb Update quickstart.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-21 20:11:57 +02:00
LocalAI [bot]
cca4f010f8 chore: ⬆️ Update ggml-org/llama.cpp to 06cbedfca1587473df9b537f1dd4d6bfa2e3de13 (#5697)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-20 22:44:39 +00:00
Ettore Di Giacinto
be3ff482d0 chore(ci): try to optimize disk space when tagging latest (#5695)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-20 15:54:14 +02:00
LocalAI [bot]
af255cd0be chore: ⬆️ Update ggml-org/llama.cpp to 8f71d0f3e86ccbba059350058af8758cafed73e6 (#5692)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-20 15:53:55 +02:00
LocalAI [bot]
8000228d1b chore: ⬆️ Update ggml-org/whisper.cpp to 3e65f518ddf840b13b74794158aa95a2c8aa30cc (#5691)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-20 15:53:38 +02:00
Ettore Di Giacinto
79abe0ad77 Drop latest references to extras images 2025-06-20 15:51:16 +02:00
Ettore Di Giacinto
8131d11d1f Update quickstart.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-19 22:42:38 +02:00
LocalAI [bot]
beb01c91f3 docs: ⬆️ update docs version mudler/LocalAI (#5690)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-19 22:13:16 +02:00
Ettore Di Giacinto
1ccd64ff6a chore: drop extras references from docs
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-19 22:04:28 +02:00
Ettore Di Giacinto
fc7681c68c Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-19 21:46:09 +02:00
Ettore Di Giacinto
49d026a229 Update backends.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-19 19:47:09 +02:00
leopardracer
f9b968e19d Fix Typos and Improve Clarity in GPU Acceleration Documentation (#5688)
Update GPU-acceleration.md

Signed-off-by: leopardracer <136604165+leopardracer@users.noreply.github.com>
2025-06-19 15:41:13 +02:00
LocalAI [bot]
022d4a5ecb chore: ⬆️ Update ggml-org/whisper.cpp to ecb8f3c2b4e282d5ef416516bcbfb92821f06bf6 (#5686)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-19 08:23:42 +02:00
LocalAI [bot]
0e917eb01d chore: ⬆️ Update ggml-org/llama.cpp to 8d947136546773f6410756f37fcc5d3e65b8135d (#5685)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-19 08:23:23 +02:00
Ettore Di Giacinto
efde0eaf83 feat(backend gallery): display download progress (#5687)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 23:49:44 +02:00
Maxim Evtush
add8fc35a2 Fix Typos in Documentation and Python Comments (#5658)
* Update istftnet.py

Signed-off-by: Maxim Evtush <154841002+maximevtush@users.noreply.github.com>

* Update GPU-acceleration.md

Signed-off-by: Maxim Evtush <154841002+maximevtush@users.noreply.github.com>

---------

Signed-off-by: Maxim Evtush <154841002+maximevtush@users.noreply.github.com>
2025-06-18 22:11:13 +02:00
Ettore Di Giacinto
9bcf4c56f1 fix(backends gallery): propagate p2p settings to correctly draw menu (#5684)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 22:06:12 +02:00
Ettore Di Giacinto
3fcfaec7c8 chore(ci): move also other jobs to public runner (#5683)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 22:00:12 +02:00
Ettore Di Giacinto
a463d40a3e chore(ci): try to use public runners also for release builds (#5681)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 21:51:54 +02:00
Ettore Di Giacinto
1e1f0ee321 chore(backends): move bark-cpp to the backend gallery (#5682)
chore(bark-cpp): move outside from binary

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 19:48:50 +02:00
Ettore Di Giacinto
80b3139fa0 Update landing.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-18 19:48:17 +02:00
LocalAI [bot]
5173d37acb chore: ⬆️ Update ggml-org/llama.cpp to 860a9e4eeff3eb2e7bd1cc38f65787cc6c8177af (#5678)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-18 10:01:46 +02:00
LocalAI [bot]
470e48a900 chore: ⬆️ Update ggml-org/whisper.cpp to f3ff80ea8da044e5b8833e7ba54ee174504c518d (#5677)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-18 10:01:08 +02:00
Ettore Di Giacinto
b706dddc93 chore(ci): switch to public runners for base images (#5680)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-17 22:38:50 +02:00
Ettore Di Giacinto
867db3f888 chore(docs): add backend url
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-17 22:35:21 +02:00
Ettore Di Giacinto
b79aa31398 chore: move backends docs
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-17 22:26:40 +02:00
Ettore Di Giacinto
fb9a09d49c chore(backend gallery): add description for remaining backends (#5679)
* chore(backend gallery): add description for remaining backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(backend gallery): add linter

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-17 22:21:44 +02:00
Ettore Di Giacinto
0a78f0ad2d chore(backend gallery): re-order and add description for vLLM (#5676)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-17 17:31:53 +02:00
Ettore Di Giacinto
d68660bd5a chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b (#5665)
chore(deps): bump llama.cpp to 'e434e69183fd9e1031f4445002083178c331a28b'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-17 17:00:10 +02:00
LocalAI [bot]
30ceee2dec chore: ⬆️ Update ggml-org/whisper.cpp to 2a4d6db7d90899aff3d58d70996916968e4e0d27 (#5661)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-17 09:21:05 +02:00
dependabot[bot]
18c38335fc chore(deps): bump securego/gosec from 2.22.4 to 2.22.5 (#5663)
Bumps [securego/gosec](https://github.com/securego/gosec) from 2.22.4 to 2.22.5.
- [Release notes](https://github.com/securego/gosec/releases)
- [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml)
- [Commits](https://github.com/securego/gosec/compare/v2.22.4...v2.22.5)

---
updated-dependencies:
- dependency-name: securego/gosec
  dependency-version: 2.22.5
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-06-16 23:12:27 +00:00
Ettore Di Giacinto
89040ff6f7 fix: add python symlink, use absolute python env path when running backends (#5664)
* fix: add python symlink, use absolute python env path when running backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(ci): do not push images when building PRs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-16 23:00:53 +02:00
Ettore Di Giacinto
de343700fd Don't run python_backend workflow on PR
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-16 11:06:56 +02:00
Ettore Di Giacinto
87d18ad951 chore: Add python3 to images (#5660)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-16 11:05:44 +02:00
Ettore Di Giacinto
912c8eff04 chore(ci): use public runner for extra backends (#5657)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-16 08:21:18 +02:00
LocalAI [bot]
481f30bde8 chore: ⬆️ Update ggml-org/llama.cpp to 30e5b01de2a0bcddc7c063c8ef0802703a958417 (#5659)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-15 23:03:40 +00:00
Ettore Di Giacinto
236ac30252 chore(ci): do not specify image-type anymore
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 17:28:40 +02:00
Ettore Di Giacinto
6f761e62e4 update README
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 16:06:43 +02:00
FT
1f29b5f38e Fix Typos and Improve Documentation Clarity (#5648)
* Update p2p.go

Signed-off-by: FT <140458077+zeevick10@users.noreply.github.com>

* Update GPU-acceleration.md

Signed-off-by: FT <140458077+zeevick10@users.noreply.github.com>

---------

Signed-off-by: FT <140458077+zeevick10@users.noreply.github.com>
2025-06-15 16:04:44 +02:00
LocalAI [bot]
33d702c5e0 chore: ⬆️ Update ggml-org/llama.cpp to 3cb203c89f60483e349f841684173446ed23c28f (#5644)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 16:03:13 +02:00
Ettore Di Giacinto
95ff236127 ci: do not fire python_backend on PRs
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 16:02:30 +02:00
Ettore Di Giacinto
2d64269763 feat: Add backend gallery (#5607)
* feat: Add backend gallery

This PR add support to manage backends as similar to models. There is
now available a backend gallery which can be used to install and remove
extra backends.
The backend gallery can be configured similarly as a model gallery, and
API calls allows to install and remove new backends in runtime, and as
well during the startup phase of LocalAI.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add backends docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wip: Backend Dockerfile for python backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: drop extras images, build python backends separately

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixup on all backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* test CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tweaks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop old backends leftovers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixup CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move dockerfile upper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix proto

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Feature dropped for consistency - we prefer model galleries

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add missing packages in the build image

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* exllama is ponly available on cublas

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* pin torch on chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups to index

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Debug CI

* Install accellerators deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add target arch

* Add cuda minor version

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use self-hosted runners

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: use quay for test images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups for vllm and chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups on CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chatterbox is only available for nvidia

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify CI builds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt test, use qwen3

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(model gallery): add jina-reranker-v1-tiny-en-gguf

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use reranker from llama.cpp in AIO images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Limit concurrent jobs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 14:56:52 +02:00
LocalAI [bot]
a7a6020328 chore: ⬆️ Update ggml-org/whisper.cpp to 705db0f728310c32bc96f4e355e2b18076932f75 (#5643)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-15 08:39:00 +02:00
Ettore Di Giacinto
40618164b2 chore: improve tests (#5646)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-14 10:07:05 +02:00
fuder.eth
eb8c29f90a Minor Documentation Updates: Clarified Comments in Python and Go Files (#5641)
* Update ui.go

Signed-off-by: fuder.eth <139509124+vtjl10@users.noreply.github.com>

* Update backend.py

Signed-off-by: fuder.eth <139509124+vtjl10@users.noreply.github.com>

---------

Signed-off-by: fuder.eth <139509124+vtjl10@users.noreply.github.com>
2025-06-13 19:55:25 +02:00
Gavin Mogan
63116a2c6a docs: Update docs metadata headers so when mentioned on slack it doesn't say hugo (#5642)
Update docs metadata headers so when mentioned on slack it doesn't say hugo

Signed-off-by: Gavin Mogan <github@gavinmogan.com>
2025-06-13 19:54:57 +02:00
LocalAI [bot]
311c2cf539 chore: ⬆️ Update ggml-org/llama.cpp to ed52f3668e633423054a4eab61bb7efee47025ab (#5636)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-12 23:33:33 +02:00
Ettore Di Giacinto
a6fcbd991d chore(model gallery): add yanfei-v2-qwen3-32b (#5639)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-12 22:24:13 +02:00
kilavvy
2e1dc8deef Fix Typos in Comments and Error Messages (#5637)
* Update initializers.go

Signed-off-by: kilavvy <140459108+kilavvy@users.noreply.github.com>

* Update base.go

Signed-off-by: kilavvy <140459108+kilavvy@users.noreply.github.com>

---------

Signed-off-by: kilavvy <140459108+kilavvy@users.noreply.github.com>
2025-06-12 18:34:32 +02:00
LocalAI [bot]
282e017b22 chore: ⬆️ Update ggml-org/whisper.cpp to ebbc874e85b518f963a87612f6d79f5c71a55e84 (#5635)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-11 23:47:00 +02:00
Ettore Di Giacinto
f86cb8be2d chore(model gallery): add qwen3-embedding-0.6b (#5634)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:40:41 +02:00
Ettore Di Giacinto
5c56ec4f87 chore(model gallery): add qwen3-embedding-8b (#5633)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:38:44 +02:00
Ettore Di Giacinto
dd2845a034 chore(model gallery): add qwen3-embedding-4b (#5632)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:31:43 +02:00
Ettore Di Giacinto
2e7db014b6 chore(model gallery): add openbuddy_openbuddy-r1-0528-distill-qwen3-32b-preview0-qat (#5631)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:27:30 +02:00
Ettore Di Giacinto
6faeee1d92 chore(model gallery): add baai_robobrain2.0-7b (#5630)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:17:32 +02:00
Ettore Di Giacinto
31d73eb934 chore(model gallery): add mistralai_magistral-small-2506 (#5629)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:11:44 +02:00
Ettore Di Giacinto
60863b9e52 chore(model gallery): add sophosympatheia_strawberrylemonade-l3-70b-v1.0 (#5628)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:08:17 +02:00
Ettore Di Giacinto
a9fc71e2f3 chore(model gallery): add kwaipilot_kwaicoder-autothink-preview (#5627)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 11:06:38 +02:00
leopardracer
ce9a9a30e0 Improve Comments and Documentation for MixedMode and ParseJSON Functions (#5626)
Update parse.go

Signed-off-by: leopardracer <136604165+leopardracer@users.noreply.github.com>
2025-06-11 09:46:53 +02:00
LocalAI [bot]
2693a21da5 chore: ⬆️ Update ggml-org/whisper.cpp to 2679bec6e09231c6fd59715fcba3eebc9e2f6076 (#5625)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-11 09:35:28 +02:00
LocalAI [bot]
d460eab18e chore: ⬆️ Update ggml-org/llama.cpp to 3678b838bb71eaccbaeb479ff38c2e12bfd2f960 (#5620)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-11 09:00:39 +02:00
LocalAI [bot]
c61e5fe266 chore: ⬆️ Update ggml-org/whisper.cpp to d78f08142381c1460604713e2f2ddf3331c7d816 (#5619)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-10 17:29:58 +02:00
Ettore Di Giacinto
88e570b5de fix(deps): pin grpcio (#5621)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-10 14:21:51 +02:00
Ettore Di Giacinto
6efa97ce0b chore(model gallery): add qwen2.5-omni-3b (#5606)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-09 10:54:42 +02:00
LocalAI [bot]
41cde5468a chore: ⬆️ Update ggml-org/llama.cpp to 247e5c6e447707bb4539bdf1913d206088a8fc69 (#5605)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-09 00:11:46 +02:00
Richard Palethorpe
d650647db9 fix(realtime): Use updated model on session update (#5604)
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-06-09 00:11:05 +02:00
LocalAI [bot]
5bc7ef37a2 chore: ⬆️ Update ggml-org/llama.cpp to 5787b5da57e54dba760c2deeac1edf892e8fc450 (#5601)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-08 08:44:24 +02:00
Ettore Di Giacinto
e0a52807c8 chore(model gallery): add akhil-theerthala_kuvera-8b-v0.1.0 (#5600)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-07 08:59:20 +02:00
LocalAI [bot]
1a95a19f87 chore: ⬆️ Update ggml-org/llama.cpp to 745aa5319b9930068aff5e87cf5e9eef7227339b (#5598)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-07 08:59:05 +02:00
LocalAI [bot]
bcfc08e5bf chore: ⬆️ Update ggml-org/whisper.cpp to b175baa665bc35f97a2ca774174f07dfffb84e19 (#5597)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-07 08:57:52 +02:00
Ettore Di Giacinto
4d282ca963 chore(model gallery): add nbeerbower_qwen3-gutenberg-encore-14b (#5596)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-06 10:20:48 +02:00
Ettore Di Giacinto
525f49b69d chore(model gallery): add open-thoughts_openthinker3-7b (#5595)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-06 10:14:00 +02:00
LocalAI [bot]
786aa1de05 chore: ⬆️ Update ggml-org/llama.cpp to 1caae7fc6c77551cb1066515e0f414713eebb367 (#5593)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-06 00:10:02 +02:00
Ettore Di Giacinto
ea82deb16b chore(model gallery): add ultravox-v0_5-llama-3_1-8b (#5592)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-05 19:23:51 +02:00
Ettore Di Giacinto
b0891309ba chore(model gallery): add ultravox-v0_5-llama-3_2-1b (#5591)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-05 19:22:01 +02:00
Ettore Di Giacinto
b034cff149 feat: improve RAM estimation by using values from summary (#5525)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-05 19:16:26 +02:00
Ettore Di Giacinto
432f34f001 chore(model gallery): add goekdeniz-guelmez_josiefied-qwen3-14b-abliterated-v3 (#5590)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-05 19:16:04 +02:00
Gavin Mogan
cbd61dccd4 fix(install.sh): vulkan docker tag (#5589)
vulkan docker tag is not prefixed with gpu

```
regctl tag ls localai/localai | grep 2.29 | grep vulkan
v2.29.0-vulkan
```

Signed-off-by: Gavin Mogan <github@gavinmogan.com>
2025-06-05 08:12:16 +02:00
LocalAI [bot]
0de0817d71 chore: ⬆️ Update ggml-org/whisper.cpp to 799eacdde40b3c562cfce1508da1354b90567f8f (#5586)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-05 08:11:38 +02:00
LocalAI [bot]
bf57d6e5ac chore: ⬆️ Update ggml-org/llama.cpp to 0d3984424f2973c49c4bcabe4cc0153b4f90c601 (#5585)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-05 08:11:12 +02:00
Ettore Di Giacinto
0b9603e010 chore(model gallery): add deepseek-ai_deepseek-r1-0528-qwen3-8b (#5580)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-04 15:28:45 +02:00
Ettore Di Giacinto
8d925217f6 chore(model gallery): add e-n-v-y_legion-v2.1-llama-70b-elarablated-v0.8-hf (#5579)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-04 11:12:37 +02:00
Ettore Di Giacinto
669a1ccae6 chore(model gallery): add nvidia_nemotron-research-reasoning-qwen-1.5b (#5578)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-04 11:07:10 +02:00
Ettore Di Giacinto
7a7d36ad63 chore(model gallery): add arcee-ai_homunculus (#5577)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-04 10:02:15 +02:00
Ettore Di Giacinto
8b889955b4 chore(deps): bump pytorch to 2.7 in vllm (#5576)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-04 08:56:45 +02:00
dependabot[bot]
a226555949 chore(deps): bump GrantBirki/git-diff-action from 2.8.0 to 2.8.1 (#5564)
Bumps [GrantBirki/git-diff-action](https://github.com/grantbirki/git-diff-action) from 2.8.0 to 2.8.1.
- [Release notes](https://github.com/grantbirki/git-diff-action/releases)
- [Commits](https://github.com/grantbirki/git-diff-action/compare/v2.8.0...v2.8.1)

---
updated-dependencies:
- dependency-name: GrantBirki/git-diff-action
  dependency-version: 2.8.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-06-04 08:41:47 +02:00
LocalAI [bot]
f38f17865a chore: ⬆️ Update ggml-org/whisper.cpp to 82f461eaa4e6a1ba29fc0dbdaa415a9934ee8a1d (#5575)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-04 08:41:26 +02:00
LocalAI [bot]
03f380701b chore: ⬆️ Update ggml-org/llama.cpp to 7e00e60ef86645a01fda738fef85b74afa016a34 (#5574)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-04 08:26:36 +02:00
Ettore Di Giacinto
65e2866c97 fix(chatterbox): install only with cuda 12 (#5573)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-03 14:57:47 +02:00
Ettore Di Giacinto
cd3cd899ad chore(deps): bump llama.cpp to '363757628848a27a435bbf22ff9476e9aeda5f40' (#5571)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-03 12:19:16 +02:00
LocalAI [bot]
c2ae3100e7 chore: ⬆️ Update ggml-org/whisper.cpp to e05af2457b7b4134ee626dc044294a19b096e62f (#5569)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-03 11:29:18 +02:00
Ettore Di Giacinto
ec0868e691 chore(deps): bump grpcio from 1.72.0 to 1.72.1 (#5570)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-03 09:59:43 +02:00
Ettore Di Giacinto
489c289916 Revert "fix(ci): try to add different mirrors to avoid 403 issues" (#5555)
Revert "fix(ci): try to add different mirrors to avoid 403 issues (#5554)"

This reverts commit 7c9f011d91.
2025-06-02 08:46:29 +02:00
LocalAI [bot]
ac5fb50bcc chore: ⬆️ Update ggml-org/whisper.cpp to 7fd6fa809749078aa00edf945e959c898f2bd1af (#5556)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-06-02 08:45:47 +02:00
Ettore Di Giacinto
7c9f011d91 fix(ci): try to add different mirrors to avoid 403 issues (#5554)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-01 08:48:53 +02:00
Ettore Di Giacinto
80f7f17843 chore(deps): bump llama.cpp to 'e562eece7cb476276bfc4cbb18deb7c0369b2233' (#5552)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-31 12:46:32 +02:00
LocalAI [bot]
f0c41d6405 chore: ⬆️ Update ggml-org/whisper.cpp to 98dfe8dc264b7d0d1daccfff9a9c043bcc2ece4b (#5542)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-31 08:51:15 +02:00
Ettore Di Giacinto
8472321a81 feat(ui): display thinking tags appropriately (#5540)
* fix(streaming): stream complete runes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(ui): display thinking tags separately

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-31 08:50:46 +02:00
Ettore Di Giacinto
3bac4724ac fix(streaming): stream complete runes (#5539)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-31 08:48:05 +02:00
Ettore Di Giacinto
59db154cbc feat(ui): allow to upload PDF and text files, also add support to multiple input files (#5538)
* Support file inputs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: support multiple files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* show preview of files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-31 08:47:48 +02:00
Ettore Di Giacinto
1cc4525f15 fix: adapt test to error changes
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-30 17:43:59 +02:00
Ettore Di Giacinto
45c58752e5 feat(ui): add audio upload button in chat view (#5526)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-30 16:47:31 +02:00
Ettore Di Giacinto
d5c9c717b5 feat(chatterbox): add new backend (#5524)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-30 10:52:55 +02:00
Ettore Di Giacinto
dd7fa6b9f7 chore(deps): bump llama.cpp to 'e83ba3e460651b20a594e9f2f0f0bffb998d3ce1 (#5527)
chore(deps): bump llama.cpp to 'e83ba3e460651b20a594e9f2f0f0bffb998d3ce1'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-30 10:29:01 +02:00
LocalAI [bot]
039c318607 chore: ⬆️ Update ggml-org/whisper.cpp to e5e900dd00747f747143ad30a697c8f21ddcd59e (#5522)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-30 08:34:52 +02:00
Ettore Di Giacinto
0870bf5af6 fix(input): handle correctly case where we pass by string list as inputs (#5521)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-29 22:06:42 +02:00
Ettore Di Giacinto
6073b9944e chore(model gallery): add moondream2-20250414 (#5518)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-29 10:47:11 +02:00
LocalAI [bot]
ef0e0f3777 chore: ⬆️ Update ggml-org/whisper.cpp to 1f5fdbecb411a61b8576242e5170c5ecef24b05a (#5515)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-29 09:45:23 +02:00
LocalAI [bot]
b7de9e0aa0 chore: ⬆️ Update ggml-org/llama.cpp to d98f2a35fcf4a8d3e660ad48cd19e2a1f3d5b2ef (#5514)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-29 09:44:36 +02:00
Ettore Di Giacinto
39292407a1 chore(model gallery): add pku-ds-lab_fairyr1-32b (#5517)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-29 09:43:45 +02:00
Ettore Di Giacinto
f257bf8d14 chore(model gallery): add pku-ds-lab_fairyr1-14b-preview (#5516)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-29 09:37:08 +02:00
Ettore Di Giacinto
8ca2fb5ef1 chore(model gallery): add qwen2.5-omni-7b (#5513)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-28 18:15:09 +02:00
LocalAI [bot]
3a790fed13 chore: ⬆️ Update ggml-org/whisper.cpp to 0ed00d9d30e8c984936ff9ed9a4fcd475d6d82e5 (#5510)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-28 09:00:22 +02:00
LocalAI [bot]
a334f28a07 chore: ⬆️ Update ggml-org/llama.cpp to a3c30846e410c91c11d7bf80978795a03bb03dee (#5509)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-28 01:39:38 +00:00
Ettore Di Giacinto
dc6663d121 fix(template): we do not always have .Name (#5508)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 18:44:24 +02:00
LocalAI [bot]
103caf9823 chore: ⬆️ Update ggml-org/llama.cpp to a26c4cc11ec7c6574e3691e90ecdbd67deeea35b (#5500)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-27 17:13:55 +02:00
Ettore Di Giacinto
4226d2d837 Update index.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-27 10:24:37 +02:00
Ettore Di Giacinto
7434256fc9 chore(model gallery): add ms-24b-mullein-v0 (#5506)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 10:14:52 +02:00
Ettore Di Giacinto
86a0563ae1 chore(model gallery): add llama3-24b-mullein-v1 (#5505)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 10:13:40 +02:00
Ettore Di Giacinto
c68951cbfe chore(model gallery): add mrm8488_qwen3-14b-ft-limo (#5504)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 10:04:16 +02:00
Ettore Di Giacinto
8408084120 chore(model gallery): add luckyrp-24b (#5503)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 10:02:25 +02:00
Ettore Di Giacinto
0f2f4c7e23 chore(model gallery): add allura-org_q3-30b-a3b-designant (#5502)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 09:59:56 +02:00
Ettore Di Giacinto
5ffad3b004 chore(deps): remove pin on transformers (#5501)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 09:24:27 +02:00
Ettore Di Giacinto
e5ccd97b8c Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-26 20:00:31 +02:00
LocalAI [bot]
a3b08d46ec chore: ⬆️ Update ggml-org/whisper.cpp to ea9f206f18d86c4eb357db9fdc52e4d9dc24435e (#5464)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-26 19:56:44 +02:00
Ettore Di Giacinto
090f5065fc chore(deps): bump llama.cpp to 'fef693dc6b959a8e8ba11558fbeaad0b264dd457' (#5467)
Also try to use a smaller model for integration tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-26 17:19:46 +02:00
Ettore Di Giacinto
88de2ea01a feat(llama.cpp): add support for audio input (#5466)
* feat(llama.cpp): add support for audio input

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-26 16:06:03 +02:00
Ettore Di Giacinto
9650d490d4 chore(model gallery): add nvidia_acereason-nemotron-14b (#5463)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-26 10:08:10 +02:00
Ettore Di Giacinto
4de1c83764 chore(model gallery): add allura-org_q3-30b-a3b-pentiment (#5462)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-26 09:46:44 +02:00
Ettore Di Giacinto
e5978dc714 chore(model gallery): add medgemma-27b-text-it (#5461)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-26 09:44:13 +02:00
Ettore Di Giacinto
f784986e19 chore(model gallery): add medgemma-4b-it (#5460)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-26 09:41:09 +02:00
Richard Palethorpe
bf6426aef2 feat: Realtime API support reboot (#5392)
* feat(realtime): Initial Realtime API implementation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: go mod tidy

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* feat: Implement transcription only mode for realtime API

Reduce the scope of the real time API for the initial realease and make
transcription only mode functional.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* chore(build): Build backends on a separate layer to speed up core only changes

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-25 22:25:05 +02:00
LocalAI [bot]
4a91950848 chore: ⬆️ Update ggml-org/llama.cpp to d13d0f6135803822ec1cd7e3efb49360b88a1bdf (#5448)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-24 08:50:41 +02:00
LocalAI [bot]
4614ea1685 chore: ⬆️ Update ggml-org/whisper.cpp to 13d92d08ae26031545921243256aaaf0ee057943 (#5449)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-23 23:44:06 +00:00
Ettore Di Giacinto
f0bf59d1d9 chore(model gallery): add vulpecula-4b (#5445)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-23 09:51:21 +02:00
Ettore Di Giacinto
83dd678959 chore(model gallery): add whiterabbitneo_whiterabbitneo-v3-7b (#5444)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-23 09:46:28 +02:00
Ettore Di Giacinto
9d6c9f874a chore(model gallery): add arliai_qwq-32b-arliai-rpr-v4 (#5443)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-23 09:34:33 +02:00
LocalAI [bot]
c62f2bb336 chore: ⬆️ Update ggml-org/llama.cpp to 8a1d206f1d2b4e45918b589f3165b4be232f7ba8 (#5440)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-23 09:22:38 +02:00
LocalAI [bot]
38aeca6f9c chore: ⬆️ Update ggml-org/whisper.cpp to 78b31ca7824500e429ba026c1a9b48e0b41c50cb (#5439)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-23 06:22:41 +00:00
Ettore Di Giacinto
3b0cf52f6a feat(llama.cpp): add reranking (#5396)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-22 21:49:30 +02:00
LocalAI [bot]
bac3022044 chore: ⬆️ Update ggml-org/whisper.cpp to bd1cb0c8e3a04baa411dc12c1325b6a9f12ee7f4 (#5424)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-22 21:49:06 +02:00
LocalAI [bot]
cd41701524 chore: ⬆️ Update ggml-org/llama.cpp to 8e186ef0e764c7a620e402d1f76ebad60bf31c49 (#5423)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-22 21:48:51 +02:00
Ettore Di Giacinto
6a382a1afe fix(transformers): try to pin to working release (#5426)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-22 12:50:51 +02:00
Ettore Di Giacinto
8dcab2f9c7 chore(scripts): allow to specify quants (#5430)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-22 11:53:30 +02:00
Ettore Di Giacinto
1d1d5627f0 chore(model gallery): add delta-vector_archaeo-12b-v2 (#5429)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-22 11:38:48 +02:00
Ettore Di Giacinto
233b3369ad chore(model gallery): add mistralai_devstral-small-2505 (#5428)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-22 11:37:17 +02:00
Ettore Di Giacinto
c587ac0aef chore(model gallery): add nvidia_llama-3.1-nemotron-nano-4b-v1.1 (#5427)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-22 11:33:33 +02:00
David Thole
38c5d16b57 feat(docs): updating the documentation on fine tuning and advanced guide. (#5420)
updating the documentation on fine tuning and advanced guide.  This mirrors how modern version of llama.cpp operate
2025-05-21 19:11:00 +02:00
LocalAI [bot]
ef6fc052eb chore: ⬆️ Update ggml-org/llama.cpp to b7a17463ec190aeee7b9077c606c910fb4688b84 (#5399)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-21 09:06:09 +02:00
LocalAI [bot]
7ff35c08ac chore: ⬆️ Update ggml-org/whisper.cpp to 62dc8f7d7b72ca8e75c57cd6a100712c631fa5d5 (#5398)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-21 09:00:42 +02:00
LocalAI [bot]
43f75ee7f3 chore(model-gallery): ⬆️ update checksum (#5422)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-21 03:52:39 +00:00
Ettore Di Giacinto
82811a9630 fix(transformers): pin protobuf (#5421)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 20:28:31 +02:00
Ettore Di Giacinto
04a3d8e5ac feat(ui): add error page to display errors (#5418)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 12:17:27 +02:00
Ettore Di Giacinto
9af09b3f8c chore(model gallery): fixup
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 12:17:21 +02:00
Ettore Di Giacinto
0d590a4044 chore(model gallery): add smolvlm2-256m-video-instruct (#5417)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 12:03:02 +02:00
Ettore Di Giacinto
e0a54de4f5 chore(model gallery): add smolvlm2-500m-video-instruct (#5416)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 11:42:30 +02:00
Ettore Di Giacinto
6bc2ae5467 chore(model gallery): add smolvlm2-2.2b-instruct (#5415)
chore(model gallery): add smolvlm-instruct

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 11:36:22 +02:00
Ettore Di Giacinto
8caaf49f5d chore(model gallery): add smolvlm-instruct (#5414)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 11:35:01 +02:00
Ettore Di Giacinto
1db51044bb chore(model gallery): add smolvlm-500m-instruct (#5413)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 11:25:32 +02:00
Ettore Di Giacinto
ec21b58008 chore(model gallery): add smolvlm-256m-instruct (#5412)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 11:15:09 +02:00
Ettore Di Giacinto
996259b529 chore(model gallery): add facebook_kernelllm (#5411)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:31:09 +02:00
Ettore Di Giacinto
f2942cc0e1 chore(model gallery): add thedrummer_valkyrie-49b-v1 (#5410)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:28:27 +02:00
Ettore Di Giacinto
f8fbfd4fa3 chore(model gallery): add a-m-team_am-thinking-v1 (#5395)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-19 17:31:38 +02:00
Ettore Di Giacinto
41e239c67e chore(model gallery): add soob3123_grayline-qwen3-8b (#5394)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-19 17:02:43 +02:00
Ettore Di Giacinto
587827e779 chore(model gallery): add soob3123_grayline-qwen3-14b (#5393)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-19 15:59:07 +02:00
LocalAI [bot]
456b4982ef chore: ⬆️ Update ggml-org/llama.cpp to 6a2bc8bfb7cd502e5ebc72e36c97a6f848c21c2c (#5390)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-19 01:25:22 +00:00
Ettore Di Giacinto
159388cce8 chore: memoize detected GPUs (#5385)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-18 08:55:44 +02:00
LocalAI [bot]
cfc73c7773 chore: ⬆️ Update ggml-org/llama.cpp to e3a7cf6c5bf6a0a24217f88607b06e4405a2b5d9 (#5384)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-18 01:21:13 +00:00
Ettore Di Giacinto
6d5bde860b feat(llama.cpp): upgrade and use libmtmd (#5379)
* WIP

* wip

* wip

* Make it compile

* Update json.hpp

* this shouldn't be private for now

* Add logs

* Reset auto detected template

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Re-enable grammars

* This seems to be broken - 360a9c98e1 (diff-a18a8e64e12a01167d8e98fc)[…]cccf0d4eed09d76d879L2998-L3207

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Placeholder

* Simplify image loading

* use completion type

* disable streaming

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* correctly return timings

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Remove some debug logging

* Adapt tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Keep header

* embedding: do not use oai type

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Sync from server.cpp

* Use utils and json directly from llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Sync with upstream

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: copy json.hpp from the correct location

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: add httplib

* sync llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Embeddiongs: set OAICOMPAT_TYPE_EMBEDDING

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: sync with server.cpp by including it

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* make it darwin-compatible

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-17 16:02:53 +02:00
LocalAI [bot]
6ef383033b chore: ⬆️ Update ggml-org/whisper.cpp to d1f114da61b1ae1e70b03104fad42c9dd666feeb (#5381)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-17 00:35:17 +00:00
Richard Palethorpe
cd494089d9 fix(flux): Set CFG=1 so that prompts are followed (#5378)
The recommendation with Flux is to set CFG to 1 as shown in the
stablediffusion-cpp README.

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-05-16 17:53:54 +02:00
LocalAI [bot]
3033845f94 chore: ⬆️ Update ggml-org/whisper.cpp to 20a20decd94badfd519a07ea91f0bba8b8fc4dea (#5374)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-16 12:46:16 +02:00
omahs
0f365ac204 fix: typos (#5376)
Signed-off-by: omahs <73983677+omahs@users.noreply.github.com>
2025-05-16 12:45:48 +02:00
Ettore Di Giacinto
525cf198be chore(model gallery): add primeintellect_intellect-2 (#5373)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-15 10:53:52 +02:00
Ettore Di Giacinto
658c2a4f55 chore(model gallery): add thedrummer_rivermind-lux-12b-v1 (#5372)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-15 10:51:55 +02:00
Ettore Di Giacinto
c987de090d chore(model gallery): add thedrummer_snowpiercer-15b-v1 (#5371)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-15 10:04:44 +02:00
Ettore Di Giacinto
04365843e6 chore(model gallery): add skywork_skywork-or1-7b (#5370)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-15 10:02:07 +02:00
Ettore Di Giacinto
1dc5781679 chore(model gallery): add skywork_skywork-or1-32b (#5369)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-15 09:58:51 +02:00
LocalAI [bot]
30704292de chore: ⬆️ Update ggml-org/whisper.cpp to f389d7e3e56bbbfec49fd333551927a0fcbb7213 (#5367)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-15 00:34:16 +00:00
Ettore Di Giacinto
e52c66c76e chore(docs/install.sh): image changes (#5354)
chore(docs): image changes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-14 19:28:30 +02:00
LocalAI [bot]
cb28aef93b chore: ⬆️ Update ggml-org/whisper.cpp to f89056057511a1657af90bb28ef3f21e5b1f33cd (#5364)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-14 09:24:16 +02:00
LocalAI [bot]
029f97c2a2 docs: ⬆️ update docs version mudler/LocalAI (#5363)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-14 01:54:34 +00:00
Ettore Di Giacinto
3be71be696 fix(ci): tag latest against cpu-only image (#5362)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-13 22:00:41 +02:00
LocalAI [bot]
6adb019f8f chore: ⬆️ Update ggml-org/llama.cpp to de4c07f93783a1a96456a44dc16b9db538ee1618 (#5358)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-13 22:00:19 +02:00
LocalAI [bot]
fcaa0a2f01 chore: ⬆️ Update ggml-org/whisper.cpp to e41bc5c61ae66af6be2bd7011769bb821a83e8ae (#5357)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-13 21:59:50 +02:00
dependabot[bot]
fd17a3312c chore(deps): bump securego/gosec from 2.22.3 to 2.22.4 (#5356)
Bumps [securego/gosec](https://github.com/securego/gosec) from 2.22.3 to 2.22.4.
- [Release notes](https://github.com/securego/gosec/releases)
- [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml)
- [Commits](https://github.com/securego/gosec/compare/v2.22.3...v2.22.4)

---
updated-dependencies:
- dependency-name: securego/gosec
  dependency-version: 2.22.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-12 22:01:43 +02:00
dependabot[bot]
12d0fe610b chore(deps): bump dependabot/fetch-metadata from 2.3.0 to 2.4.0 (#5355)
Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 2.3.0 to 2.4.0.
- [Release notes](https://github.com/dependabot/fetch-metadata/releases)
- [Commits](https://github.com/dependabot/fetch-metadata/compare/v2.3.0...v2.4.0)

---
updated-dependencies:
- dependency-name: dependabot/fetch-metadata
  dependency-version: 2.4.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-12 22:01:19 +02:00
Ettore Di Giacinto
11c67d16b8 chore(ci): strip 'core' in the image suffix, identify python-based images with 'extras' (#5353)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-12 09:36:59 +02:00
LocalAI [bot]
63f7c86c4d chore: ⬆️ Update ggml-org/llama.cpp to 9a390c4829cd3058d26a2e2c09d16e3fd12bf1b1 (#5351)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-12 09:24:54 +02:00
LocalAI [bot]
ac89bf77bf chore: ⬆️ Update ggml-org/whisper.cpp to 2e310b841e0b4e7cf00890b53411dd9f8578f243 (#4785)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-12 01:30:35 +00:00
Ettore Di Giacinto
0395cc02fb chore(model gallery): add qwen_qwen2.5-vl-72b-instruct (#5349)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-11 09:46:32 +02:00
Ettore Di Giacinto
616972fca0 chore(model gallery): add qwen_qwen2.5-vl-7b-instruct (#5348)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-11 09:44:58 +02:00
Ettore Di Giacinto
942fbff62d chore(model gallery): add gryphe_pantheon-proto-rp-1.8-30b-a3b (#5347)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-11 09:39:28 +02:00
LocalAI [bot]
2612a0c910 chore: ⬆️ Update ggml-org/llama.cpp to 15e6125a397f6086c1dfdf7584acdb7c730313dc (#5345)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-11 09:21:46 +02:00
LocalAI [bot]
2dcb6d7247 chore(model-gallery): ⬆️ update checksum (#5346)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-10 22:24:04 +02:00
Ettore Di Giacinto
6978eec69f feat(whisper.cpp): gpu support (#5344)
* fix(whisper.cpp): gpu support

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Try to fix apple tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-10 22:02:40 +02:00
LocalAI [bot]
2fcfe54466 chore: ⬆️ Update ggml-org/llama.cpp to 33eff4024084d1f0c8441b79f7208a52fad79858 (#5343)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-10 10:07:39 +02:00
Ettore Di Giacinto
4e7506a3be fix(whisper): add vulkan flag
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-10 08:46:21 +02:00
Ettore Di Giacinto
2a46217f90 Update Makefile
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-09 23:17:18 +02:00
Ettore Di Giacinto
31ff9dbd52 chore(Makefile): small cleanups, disable openmp on whisper
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 22:37:18 +02:00
Ettore Di Giacinto
9483abef03 fix(whisper/sycl): disable
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 22:36:09 +02:00
Ettore Di Giacinto
ce3e8b3e31 fix(whisper/sycl): use icx when running go build
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 21:48:09 +02:00
Ettore Di Giacinto
f3bb84c9a7 feat(whisper): link vulkan, hipblas and sycl
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 19:25:26 +02:00
Ettore Di Giacinto
ecb1297582 fix: specify icx and icpx only on whisper.cpp
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 10:58:30 +02:00
Ettore Di Giacinto
73fc702b3c fix: this is not needed
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 10:28:53 +02:00
Ettore Di Giacinto
e3af62ae1a feat: Add sycl support for whisper.cpp (#5341) 2025-05-09 09:31:02 +02:00
Ettore Di Giacinto
dc21604741 chore(deps): bump whisper.cpp (#5338)
* chore(deps): bump whisper.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* add libggml-metal

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups macOS arm64

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* adjust cublas for whisper.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 08:17:45 +02:00
LocalAI [bot]
5433f1a70e chore: ⬆️ Update ggml-org/llama.cpp to f05a6d71a0f3dbf0730b56a1abbad41c0f42e63d (#5340)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-08 23:13:28 +00:00
Ettore Di Giacinto
d5e032bdcd chore(model gallery): add gemma-3-12b-fornaxv.2-qat-cot (#5337)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-08 12:07:25 +02:00
Ettore Di Giacinto
de786f6586 chore(model gallery): add symiotic-14b-i1 (#5336)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-08 12:03:35 +02:00
Ettore Di Giacinto
8b9bc4aa6e chore(model gallery): add qwen3-14b-uncensored (#5335)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-08 11:59:26 +02:00
Ettore Di Giacinto
e6cea7d28e chore(model gallery): add cognition-ai_kevin-32b (#5334)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-08 11:57:12 +02:00
Ettore Di Giacinto
7d7d56f2ce chore(model gallery): add servicenow-ai_apriel-nemotron-15b-thinker (#5333)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-08 11:55:35 +02:00
Ettore Di Giacinto
1caae91ab6 chore(model gallery): add qwen3-4b-esper3-i1 (#5332)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-08 11:52:02 +02:00
LocalAI [bot]
e90f2cb0ca chore: ⬆️ Update ggml-org/llama.cpp to 814f795e063c257f33b921eab4073484238a151a (#5331)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-08 09:25:13 +02:00
Ettore Di Giacinto
5a4291fadd docs: update README badges
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-07 22:20:06 +02:00
Ettore Di Giacinto
91ef58ee5a chore(model gallery): add qwen3-14b-griffon-i1 (#5330)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-07 11:07:38 +02:00
LocalAI [bot]
a86e8c78f1 chore: ⬆️ Update ggml-org/llama.cpp to 91a86a6f354aa73a7aab7bc3d283be410fdc93a5 (#5329)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-06 23:39:10 +00:00
Ettore Di Giacinto
adb24214c6 chore(deps): bump llama.cpp to b34c859146630dff136943abc9852ca173a7c9d6 (#5323)
chore(deps): bump llama.cpp to 'b34c859146630dff136943abc9852ca173a7c9d6'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-06 11:21:25 +02:00
Ettore Di Giacinto
f03a0430aa chore(model gallery): add claria-14b (#5326)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-06 10:48:03 +02:00
Ettore Di Giacinto
73bc12abc0 chore(model gallery): add goekdeniz-guelmez_josiefied-qwen3-8b-abliterated-v1 (#5325)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-06 10:38:20 +02:00
Ettore Di Giacinto
7fa437bbcc chore(model gallery): add huihui-ai_qwen3-14b-abliterated (#5324)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-06 10:35:55 +02:00
LocalAI [bot]
4a27c99928 chore(model-gallery): ⬆️ update checksum (#5321)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-06 10:01:28 +02:00
Ettore Di Giacinto
6ce94834b6 fix(hipblas): do not build all cpu-specific flags (#5322)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-06 10:00:50 +02:00
dependabot[bot]
84a26458dc chore(deps): bump mxschmitt/action-tmate from 3.21 to 3.22 (#5319)
Bumps [mxschmitt/action-tmate](https://github.com/mxschmitt/action-tmate) from 3.21 to 3.22.
- [Release notes](https://github.com/mxschmitt/action-tmate/releases)
- [Changelog](https://github.com/mxschmitt/action-tmate/blob/master/RELEASE.md)
- [Commits](https://github.com/mxschmitt/action-tmate/compare/v3.21...v3.22)

---
updated-dependencies:
- dependency-name: mxschmitt/action-tmate
  dependency-version: '3.22'
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-05 22:17:59 +00:00
Ettore Di Giacinto
7aa377b6a9 fix(arm64): do not build instructions which are not available (#5318)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-05 17:30:00 +02:00
Ettore Di Giacinto
64e66dda4a chore(model gallery): add allura-org_remnant-qwen3-8b (#5317)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-05 11:09:07 +02:00
LocalAI [bot]
a085f61fdc chore: ⬆️ Update ggml-org/llama.cpp to 9fdfcdaeddd1ef57c6d041b89cd8fb7048a0f028 (#5316)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-04 23:00:25 +00:00
Ettore Di Giacinto
21bdfe5fa4 fix: use rice when embedding large binaries (#5309)
* fix(embed): use go-rice for large backend assets

Golang embed FS has a hard limit that we might exceed when providing
many binary alternatives.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* simplify golang deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): switch to testcontainers and print logs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(tests): do not build a test binary

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* small fixup

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-04 16:42:42 +02:00
Ettore Di Giacinto
7ebd7b2454 chore(model gallery): add rei-v3-kto-12b (#5313)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-04 09:41:35 +02:00
Ettore Di Giacinto
6984749ea1 chore(model gallery): add kalomaze_qwen3-16b-a3b (#5312)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-04 09:39:38 +02:00
Ettore Di Giacinto
c0a206bc7a chore(model gallery): add qwen3-30b-a1.5b-high-speed (#5311)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-04 09:38:01 +02:00
LocalAI [bot]
01bbb31fb3 chore: ⬆️ Update ggml-org/llama.cpp to 36667c8edcded08063ed51c7d57e9e086bbfc903 (#5300)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-04 09:23:01 +02:00
Ettore Di Giacinto
72111c597d fix(gpu): do not assume gpu being returned has node and mem (#5310)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 19:00:24 +02:00
Ettore Di Giacinto
b2f9fc870b chore(defaults): enlarge defaults, drop gpu layers which is infered (#5308)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 18:44:51 +02:00
Ettore Di Giacinto
1fc6d469ac chore(deps): bump llama.cpp to '1d36b3670b285e69e58b9d687c770a2a0a192194 (#5307)
chore(deps): bump llama.cpp to '1d36b3670b285e69e58b9d687c770a2a0a192194'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 18:44:40 +02:00
Ettore Di Giacinto
05848b2027 chore(model gallery): add smoothie-qwen3-8b (#5306)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 10:35:20 +02:00
Ettore Di Giacinto
1da0644aa3 chore(model gallery): add qwen-3-32b-medical-reasoning-i1 (#5305)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 10:24:07 +02:00
Ettore Di Giacinto
c087cd1377 chore(model gallery): add amoral-qwen3-14b (#5304)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 10:21:48 +02:00
Ettore Di Giacinto
c621412f6a chore(model gallery): add comet_12b_v.5-i1 (#5303)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 10:20:03 +02:00
Ettore Di Giacinto
5a8b1892cd chore(model gallery): add genericrpv3-4b (#5302)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 10:18:31 +02:00
Ettore Di Giacinto
5b20426863 chore(model gallery): add planetoid_27b_v.2 (#5301)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-03 10:14:33 +02:00
Ettore Di Giacinto
5c6cd50ed6 feat(llama.cpp): estimate vram usage (#5299)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-02 17:40:26 +02:00
Ettore Di Giacinto
bace6516f1 chore(model gallery): add webthinker-qwq-32b-i1 (#5298)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-02 09:57:49 +02:00
Ettore Di Giacinto
3baadf6f27 chore(model gallery): add shuttleai_shuttle-3.5 (#5297)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-02 09:48:11 +02:00
Ettore Di Giacinto
8804c701b8 chore(model gallery): add microsoft_phi-4-reasoning (#5296)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-02 09:46:20 +02:00
Ettore Di Giacinto
7b3ceb19bb chore(model gallery): add microsoft_phi-4-reasoning-plus (#5295)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-02 09:43:38 +02:00
Ettore Di Giacinto
e7f3effea1 chore(model gallery): add furina-8b (#5294)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-02 09:39:22 +02:00
Ettore Di Giacinto
61694a2ffb chore(model gallery): add josiefied-qwen3-8b-abliterated-v1 (#5293)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-02 09:36:35 +02:00
LocalAI [bot]
573a3f104c chore: ⬆️ Update ggml-org/llama.cpp to d7a14c42a1883a34a6553cbfe30da1e1b84dfd6a (#5292)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-02 09:21:38 +02:00
Ettore Di Giacinto
0e8af53a5b chore: update quickstart
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-01 22:36:33 +02:00
Ettore Di Giacinto
960ffa808c chore(model gallery): add microsoft_phi-4-mini-reasoning (#5288)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-01 10:17:58 +02:00
Ettore Di Giacinto
92719568e5 chore(model gallery): add fast-math-qwen3-14b (#5287)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-01 10:14:51 +02:00
Ettore Di Giacinto
163939af71 chore(model gallery): add qwen3-8b-jailbroken (#5286)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-01 10:13:01 +02:00
Ettore Di Giacinto
399f1241dc chore(model gallery): add qwen3-30b-a3b-abliterated (#5285)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-01 10:07:42 +02:00
LocalAI [bot]
58c9ade2e8 chore: ⬆️ Update ggml-org/llama.cpp to 3e168bede4d27b35656ab8026015b87659ecbec2 (#5284)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-05-01 10:01:39 +02:00
Ettore Di Giacinto
6e1c93d84f fix(ci): comment out vllm tests
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-05-01 10:01:22 +02:00
Wyatt Neal
4076ea0494 fix: vllm missing logprobs (#5279)
* working to address missing items

referencing #3436, #2930 - if i could test it, this might show that the
output from the vllm backend is processed and returned to the user

Signed-off-by: Wyatt Neal <wyatt.neal+git@gmail.com>

* adding in vllm tests to test-extras

Signed-off-by: Wyatt Neal <wyatt.neal+git@gmail.com>

* adding in tests to pipeline for execution

Signed-off-by: Wyatt Neal <wyatt.neal+git@gmail.com>

* removing todo block, test via pipeline

Signed-off-by: Wyatt Neal <wyatt.neal+git@gmail.com>

---------

Signed-off-by: Wyatt Neal <wyatt.neal+git@gmail.com>
2025-04-30 12:55:07 +00:00
Ettore Di Giacinto
26cbf77c0d chore(model gallery): add mlabonne_qwen3-4b-abliterated (#5283)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-30 11:09:58 +02:00
Ettore Di Giacinto
640790d628 chore(model gallery): add mlabonne_qwen3-8b-abliterated (#5282)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-30 11:08:26 +02:00
Ettore Di Giacinto
4132adea2f chore(model gallery): add mlabonne_qwen3-14b-abliterated (#5281)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-30 11:04:49 +02:00
LocalAI [bot]
2b2d907a3a chore: ⬆️ Update ggml-org/llama.cpp to e2e1ddb93a01ce282e304431b37e60b3cddb6114 (#5278)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-29 21:46:08 +00:00
Ettore Di Giacinto
6e8f4f584b fix(diffusers): consider options only in form of key/value (#5277)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 17:08:55 +02:00
Richard Palethorpe
662cfc2b48 fix(aio): Fix copypasta in download files for gpt-4 model (#5276)
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-29 17:08:16 +02:00
Ettore Di Giacinto
a25d355d66 chore(model gallery): add qwen3-0.6b (#5275)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 10:10:16 +02:00
Ettore Di Giacinto
6d1cfdbefc chore(model gallery): add qwen3-1.7b (#5274)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 10:06:03 +02:00
Ettore Di Giacinto
5ecc478968 chore(model gallery): add qwen3-4b (#5273)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 10:01:22 +02:00
Ettore Di Giacinto
aef5c4291b chore(model gallery): add qwen3-8b (#5272)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 09:59:17 +02:00
Ettore Di Giacinto
c059f912b9 chore(model gallery): add qwen3-14b (#5271)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 09:56:50 +02:00
LocalAI [bot]
bc1e059259 chore: ⬆️ Update ggml-org/llama.cpp to 5f5e39e1ba5dbea814e41f2a15e035d749a520bc (#5267)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-29 09:49:42 +02:00
LocalAI [bot]
38dc07793a chore(model-gallery): ⬆️ update checksum (#5268)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-29 09:49:23 +02:00
Ettore Di Giacinto
da6ef0967d chore(model gallery): add qwen3-32b (#5270)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 09:48:28 +02:00
Ettore Di Giacinto
7a011e60bd chore(model gallery): add qwen3-30b-a3b (#5269)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-29 09:44:44 +02:00
dependabot[bot]
e13dd5b09f chore(deps): bump appleboy/scp-action from 0.1.7 to 1.0.0 (#5265)
Bumps [appleboy/scp-action](https://github.com/appleboy/scp-action) from 0.1.7 to 1.0.0.
- [Release notes](https://github.com/appleboy/scp-action/releases)
- [Changelog](https://github.com/appleboy/scp-action/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/appleboy/scp-action/compare/v0.1.7...v1.0.0)

---
updated-dependencies:
- dependency-name: appleboy/scp-action
  dependency-version: 1.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-28 22:36:30 +00:00
Ettore Di Giacinto
86ee303bd6 chore(model gallery): add nvidia_openmath-nemotron-14b-kaggle (#5264)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-28 19:52:36 +02:00
Ettore Di Giacinto
978ee96fd3 chore(model gallery): add nvidia_openmath-nemotron-14b (#5263)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-28 19:43:49 +02:00
Ettore Di Giacinto
3ad5691db6 chore(model gallery): add nvidia_openmath-nemotron-7b (#5262)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-28 19:41:59 +02:00
Ettore Di Giacinto
0027681090 chore(model gallery): add nvidia_openmath-nemotron-1.5b (#5261)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-28 19:40:09 +02:00
Ettore Di Giacinto
8cba990edc chore(model gallery): add nvidia_openmath-nemotron-32b (#5260)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-28 19:36:57 +02:00
Simon Redman
88857696d4 fix(CUDA): Add note for how to run CUDA with SELinux (#5259)
* Add note to help run nvidia containers with SELinux

* Use correct CUDA container references as noted in the dockerhub overview

* Clean trailing whitespaces
2025-04-28 09:00:52 +02:00
LocalAI [bot]
23f347e687 chore: ⬆️ Update ggml-org/llama.cpp to ced44be34290fab450f8344efa047d8a08e723b4 (#5258)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-27 21:59:35 +00:00
Mohit Gaur
b6e3dc5f02 docs: update docs for DisableWebUI flag (#5256)
Signed-off-by: Mohit Gaur <56885276+Mohit-Gaur@users.noreply.github.com>
2025-04-27 16:02:02 +02:00
Alessandro Pirastru
69667521e2 fix(install/gpu):Fix docker not being able to leverage the GPU on systems that have SELinux Enforced (#5252)
* Update installation script for improved compatibility and clarity

- Renamed VERSION to LOCALAI_VERSION to avoid conflicts with system variables.
- Enhanced NVIDIA and CUDA repository installation for DNF5 compatibility.
- Adjusted default Fedora version handling for CUDA installation.
- Updated Docker image tag handling to use LOCALAI_VERSION consistently.
- Improved logging messages for repository and LocalAI binary downloads.
- Added a temporary bypass for nvidia-smi installation on Fedora Cloud Edition.

* feat: Add SELinux configuration for NVIDIA GPU support in containers

- Introduced `enable_selinux_container_booleans` function to handle SELinux configuration changes for GPU access.
- Included user confirmation prompt to enable SELinux `container_use_devices` boolean due to security implications.
- Added NVIDIA Container Runtime to Docker runtimes and restarted Docker to ensure proper GPU support.
- Applied SELinux adjustments conditionally for Fedora, RHEL, CentOS, Rocky, and openSUSE distributions.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* fix: Correct SELinux boolean parsing and add loop break

- Fixed incorrect parsing of `container_use_devices` boolean by changing the awk field from `$2` to `$3` to retrieve the correct value.
- Added a `break` statement after enabling the SELinux boolean to prevent unnecessary loop iterations after user prompt.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* fix: typo in install.sh

Signed-off-by: Alessandro Pirastru <57262788+Bloodis94@users.noreply.github.com>

---------

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>
Signed-off-by: Alessandro Pirastru <57262788+Bloodis94@users.noreply.github.com>
2025-04-27 16:01:29 +02:00
LocalAI [bot]
2a92effc5d chore: ⬆️ Update ggml-org/llama.cpp to 77d5e9a76a7b4a8a7c5bf9cf6ebef91860123cba (#5254)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-27 09:21:02 +02:00
Simon Redman
a65e012aa2 docs(Vulkan): Add GPU docker documentation for Vulkan (#5255)
Add GPU docker documentation for Vulkan
2025-04-27 09:20:26 +02:00
Ettore Di Giacinto
8e9b41d05f chore(ci): build only images with ffmpeg included, simplify tags (#5251)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-27 08:23:25 +02:00
LocalAI [bot]
078da5c2f0 feat(swagger): update swagger (#5253)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-26 22:40:35 +00:00
Ettore Di Giacinto
c5af5d139c Update index.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-26 18:42:22 +02:00
Ettore Di Giacinto
2c9279a542 feat(video-gen): add endpoint for video generation (#5247)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-26 18:05:01 +02:00
Ettore Di Giacinto
a67d22f5f2 chore(model gallery): add mmproj to gemma3 models (now working)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-26 17:31:40 +02:00
Ettore Di Giacinto
dc7c51dcc7 chore(model gallery): fix correct filename for gemma-3-27b-it-qat
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-26 17:27:50 +02:00
Ettore Di Giacinto
98df65c7aa chore(model gallery): add l3.3-genetic-lemonade-sunset-70b (#5250)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-26 17:19:20 +02:00
Ettore Di Giacinto
1559b6b522 chore(model gallery): add l3.3-geneticlemonade-unleashed-v2-70b (#5249)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-26 17:17:18 +02:00
Alessandro Pirastru
a0244e3fb4 feat(install): added complete process for installing nvidia drivers on fedora without pulling X11 (#5246)
* Update installation script for improved compatibility and clarity

- Renamed VERSION to LOCALAI_VERSION to avoid conflicts with system variables.
- Enhanced NVIDIA and CUDA repository installation for DNF5 compatibility.
- Adjusted default Fedora version handling for CUDA installation.
- Updated Docker image tag handling to use LOCALAI_VERSION consistently.
- Improved logging messages for repository and LocalAI binary downloads.
- Added a temporary bypass for nvidia-smi installation on Fedora Cloud Edition.

* Enhance log functions with ANSI color formatting

- Added ANSI escape codes for improved log styling: light blue for info, orange for warnings, and red for errors.
- Updated all log functions (`info`, `warn`, `fatal`) to include bold and colored output.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* feat: Enhance log functions with ANSI color formatting

- Added ANSI escape codes for improved log styling: light blue for info, orange for warnings, and red for errors.
- Updated all log functions (`info`, `warn`, `fatal`) to include bold and colored output.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* chore: ⬆️ Update ggml-org/llama.cpp to `ecda2ec4b347031a9b8a89ee2efc664ce63f599c` (#5238)

⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* fix(stablediffusion-ggml): Build with DSD CUDA, HIP and Metal flags (#5236)

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* feat(install): enhance script with choice functions and logs

- Added custom `choice_info`, `choice_warn`, and `choice_fatal` functions for interactive input logging.
- Adjusted Docker volume creation message for better clarity.
- Included NVIDIA driver check log for improved feedback to users.
- Added consistent logging before starting LocalAI Docker containers across configurations.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* feat(install): add Fedora NVIDIA driver installation option

- Introduced a new function to install NVIDIA kernel drivers on Fedora using akmod packages.
- Added user prompt to choose between installing drivers automatically or exiting for manual setup.
- Integrated the new function into the existing Fedora-specific CUDA toolkit installation workflow.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* fix(install): correct repository ID for DNF5 configuration

- Update repository ID from 'nome-repo' to 'nvidia-cuda' for DNF5.
- Ensures the correct repository name matches expected configuration.
- Fix prevents potential misconfiguration during installation process.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* feat(install): enhance NVIDIA driver handling on Fedora

- fixed `install_cuda_driver_yum` function call in `install_fedora_nvidia_kernel_drivers`
- Added `cuda-toolkit` for Fedora installations, as recommended by RPM Fusion.
- Adjusted driver repository commands for compatibility with DNF5.
- Improved URL and version handling for package manager installations.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* Refactor NVIDIA driver installation process in install.sh

- Removed redundant empty lines for cleaner formatting.
- Standardized URL formatting by removing unnecessary quotes around URLs.
- Reverted logic by removing Fedora-specific exclusions for cuda-toolkit and using `cuda-drivers` universally.
- Refined repository addition for `dnf` by explicitly setting `id` and `name` parameters for clarity and accuracy.
- Fixed minor formatting inconsistencies in parameter passing.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* feat: Update NVIDIA module installation warning in install script

- Clarified that Akmod installation may inhibit the reboot command.
- Added a cautionary note to the warning to inform users of potential risks.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

* Update NVIDIA driver installation warning message

- Clarify prerequisites by noting the need for rpmfusion free/nonfree repos.
- Improve formatting of the warning box for better readability.
- Inform users that the script will install missing repos if necessary.

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>

---------

Signed-off-by: Alessandro Pirastru <alessandro.pirastru.94@gmail.com>
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: LocalAI [bot] <139863280+localai-bot@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Richard Palethorpe <io@richiejp.com>
2025-04-26 09:44:40 +02:00
LocalAI [bot]
d66396201a chore: ⬆️ Update ggml-org/llama.cpp to 295354ea6848a77bdee204ee1c971d9b92ffcca9 (#5245)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-26 00:05:16 +02:00
Ettore Di Giacinto
9628860c0e feat(llama.cpp/clip): inject gpu options if we detect GPUs (#5243)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-26 00:04:47 +02:00
Ettore Di Giacinto
cae9bf1308 chore(deps): bump grpcio to 1.72.0 (#5244)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-25 21:32:37 +02:00
Ettore Di Giacinto
5bb5da0760 fix(ci): add clang (#5242)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-25 16:20:05 +02:00
Ettore Di Giacinto
867973a850 chore(model gallery): add soob3123_veritas-12b (#5241)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-25 09:20:01 +02:00
LocalAI [bot]
701cd6b6d5 chore: ⬆️ Update ggml-org/llama.cpp to 226251ed56b85190e18a1cca963c45b888f4953c (#5240)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-25 08:42:22 +02:00
Richard Palethorpe
7f61d397d5 fix(stablediffusion-ggml): Build with DSD CUDA, HIP and Metal flags (#5236)
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-24 10:27:17 +02:00
Alessandro Pirastru
1ae0b896fa fix: installation script compatibility with fedora 41 and later, fedora headless unclear errors (#5239)
Update installation script for improved compatibility and clarity

- Renamed VERSION to LOCALAI_VERSION to avoid conflicts with system variables.
- Enhanced NVIDIA and CUDA repository installation for DNF5 compatibility.
- Adjusted default Fedora version handling for CUDA installation.
- Updated Docker image tag handling to use LOCALAI_VERSION consistently.
- Improved logging messages for repository and LocalAI binary downloads.
- Added a temporary bypass for nvidia-smi installation on Fedora Cloud Edition.
2025-04-24 09:34:25 +02:00
LocalAI [bot]
3937407cb3 chore: ⬆️ Update ggml-org/llama.cpp to ecda2ec4b347031a9b8a89ee2efc664ce63f599c (#5238)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-24 09:32:08 +02:00
LocalAI [bot]
0e34ae4f3f chore: ⬆️ Update ggml-org/llama.cpp to 658987cfc9d752dca7758987390d5fb1a7a0a54a (#5234)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-23 09:13:49 +02:00
dependabot[bot]
a38b99ecb6 chore(deps): bump mxschmitt/action-tmate from 3.19 to 3.21 (#5231)
Bumps [mxschmitt/action-tmate](https://github.com/mxschmitt/action-tmate) from 3.19 to 3.21.
- [Release notes](https://github.com/mxschmitt/action-tmate/releases)
- [Changelog](https://github.com/mxschmitt/action-tmate/blob/master/RELEASE.md)
- [Commits](https://github.com/mxschmitt/action-tmate/compare/v3.19...v3.21)

---
updated-dependencies:
- dependency-name: mxschmitt/action-tmate
  dependency-version: '3.21'
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-22 10:27:10 +02:00
LocalAI [bot]
a4a4358182 chore: ⬆️ Update ggml-org/llama.cpp to 1d735c0b4fa0551c51c2f4ac888dd9a01f447985 (#5233)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-22 10:25:54 +02:00
Ettore Di Giacinto
4bc39c2db3 fix: typo on README link
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-21 22:13:14 +02:00
Ettore Di Giacinto
cc3df759f8 chore(docs): improve installer.sh docs (#5232)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-21 22:11:43 +02:00
LocalAI [bot]
378161060c chore: ⬆️ Update ggml-org/llama.cpp to 6602304814e679cc8c162bb760a034aceb4f8965 (#5228)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-20 21:44:33 +00:00
Ettore Di Giacinto
f2f788fe60 chore(model gallery): add starrysky-12b-i1 (#5224)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-20 10:26:30 +02:00
Ettore Di Giacinto
9fa8ed6b1e chore(model gallery) add amoral-gemma3-1b-v2 (#5223)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-20 10:23:24 +02:00
Ettore Di Giacinto
7fc37c5e29 chore(model gallery) add llama_3.3_70b_darkhorse-i1 (#5222)
chore(model gallery): add llama_3.3_70b_darkhorse-i1

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-20 10:20:58 +02:00
Ettore Di Giacinto
4bc4b1e8bc chore(model gallery) update gemma3 qat models
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-20 10:11:12 +02:00
LocalAI [bot]
e495b89f18 chore: ⬆️ Update ggml-org/llama.cpp to 00137157fca3d17b90380762b4d7cc158d385bd3 (#5218)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-19 23:50:35 +00:00
LocalAI [bot]
ba09eaea1b feat(swagger): update swagger (#5217)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-19 22:06:30 +02:00
Ettore Di Giacinto
61cc76c455 chore(autogptq): drop archived backend (#5214)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-19 15:52:29 +02:00
Ettore Di Giacinto
8abecb4a18 chore: bump grpc limits to 50MB (#5212)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-19 08:53:24 +02:00
LocalAI [bot]
8b3f76d8e6 chore: ⬆️ Update ggml-org/llama.cpp to 6408210082cc0a61b992b487be7e2ff2efbb9e36 (#5211)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-18 21:45:48 +00:00
Ettore Di Giacinto
4e0497f1a6 chore(model gallery): add pictor-1338-qwenp-1.5b (#5208)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-18 10:47:23 +02:00
Ettore Di Giacinto
ba88c9f451 chore(ci): use gemma-3-12b-it for models notifications (twitter)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-18 10:38:36 +02:00
Ettore Di Giacinto
a598285825 chore(model gallery): add google-gemma-3-27b-it-qat-q4_0-small (#5207)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-18 10:35:48 +02:00
Ettore Di Giacinto
cb7a172897 chore(ci): use gemma-3-12b-it for models notifications
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-18 10:20:33 +02:00
Ettore Di Giacinto
771be28dfb ci: use gemma3 for notifications of releases
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-18 10:19:52 +02:00
Ettore Di Giacinto
7d6b3eb42d chore(model gallery): add readyart_amoral-fallen-omega-gemma3-12b (#5206)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-18 10:17:39 +02:00
Ettore Di Giacinto
0bb33fab55 chore(model gallery): add ibm-granite_granite-3.3-2b-instruct (#5205)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-18 10:15:05 +02:00
Ettore Di Giacinto
e3bf7f77f7 chore(model gallery): add ibm-granite_granite-3.3-8b-instruct (#5204)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-18 09:59:17 +02:00
LocalAI [bot]
bd1707d339 chore: ⬆️ Update ggml-org/llama.cpp to 2f74c354c0f752ed9aabf7d3a350e6edebd7e744 (#5203)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-17 21:52:12 +00:00
Ettore Di Giacinto
0474804541 fix(ci): remove duplicate entry
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-17 19:51:21 +02:00
Ettore Di Giacinto
72693b3917 feat(install.sh): allow to uninstall with --uninstall (#5202)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-17 16:32:23 +02:00
Florian Bachmann
a03b70010f fix(talk): Talk interface sends content-type headers to chatgpt (#5200)
Talk interface sends content-type headers to chatgpt

Signed-off-by: baflo <834350+baflo@users.noreply.github.com>
2025-04-17 15:02:11 +02:00
Ettore Di Giacinto
e3717e5c1a chore(model gallery): add qwen2.5-14b-instruct-1m (#5201)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-17 10:42:22 +02:00
Ettore Di Giacinto
c8f6858218 chore(ci): add latest images for core (#5198)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-17 10:00:18 +02:00
Ettore Di Giacinto
06d7cc43ae chore(model gallery): add dreamgen_lucid-v1-nemo (#5196)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-17 09:10:09 +02:00
Ettore Di Giacinto
f2147cb850 chore(model gallery): add thedrummer_rivermind-12b-v1 (#5195)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-17 09:02:54 +02:00
Ettore Di Giacinto
75bb9f4c28 chore(model gallery): add menlo_rezero-v0.1-llama-3.2-3b-it-grpo-250404 (#5194)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-17 09:00:11 +02:00
LocalAI [bot]
a2ef4b1e07 chore: ⬆️ Update ggml-org/llama.cpp to 015022bb53387baa8b23817ac03743705c7d472b (#5192)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-17 08:04:37 +02:00
LocalAI [bot]
161c9fe2db docs: ⬆️ update docs version mudler/LocalAI (#5191)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-16 22:13:49 +02:00
Ettore Di Giacinto
7547463f81 Update quickstart.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-16 08:48:55 +02:00
Gianluca Boiano
32e4dfd47b chore(model gallery): add suno-ai bark-cpp model (#5187)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-04-16 08:22:46 +02:00
Gianluca Boiano
f67e5dec68 fix: bark-cpp: assign FLAG_TTS to bark-cpp backend (#5186)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-04-16 08:21:30 +02:00
LocalAI [bot]
297d54acea chore: ⬆️ Update ggml-org/llama.cpp to 80f19b41869728eeb6a26569957b92a773a2b2c6 (#5183)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-15 22:50:32 +00:00
Ettore Di Giacinto
56f44d448c chore(docs): decrease logo size, minor enhancements
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-15 22:00:51 +02:00
Richard Palethorpe
0f0fafacd9 fix(stablediffusion): Avoid overwriting SYCL specific flags from outer make call (#5181)
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-15 19:31:25 +02:00
Ettore Di Giacinto
4f239bac89 feat: rebrand - LocalAGI and LocalRecall joins the LocalAI stack family (#5159)
* wip

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update lotusdocs and hugo

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* rephrasing

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Latest fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adjust readme section

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-15 17:51:24 +02:00
Ettore Di Giacinto
04d74ac648 chore(model gallery): add m1-32b (#5182)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-15 17:17:17 +02:00
Richard Palethorpe
18c3dc33ee fix(stablediffusion): Pass ROCM LD CGO flags through to recursive make (#5179)
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-15 09:27:29 +02:00
LocalAI [bot]
508cfa7369 chore: ⬆️ Update ggml-org/llama.cpp to d6d2c2ab8c8865784ba9fef37f2b2de3f2134d33 (#5178)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-14 23:10:16 +02:00
Ettore Di Giacinto
1f94cddbae chore(model gallery): add nvidia_llama-3.1-8b-ultralong-4m-instruct (#5177)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-14 12:30:55 +02:00
Ettore Di Giacinto
21ae7b4cd4 chore(model gallery): add nvidia_llama-3.1-8b-ultralong-1m-instruct (#5176)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-14 12:28:09 +02:00
Ettore Di Giacinto
bef22ab547 chore(model gallery): add skywork_skywork-or1-32b-preview (#5175)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-14 12:25:43 +02:00
Ettore Di Giacinto
eb04e8cdcf chore(model gallery): add skywork_skywork-or1-math-7b (#5174)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-14 12:23:44 +02:00
Ettore Di Giacinto
17e533a086 chore(model gallery): add skywork_skywork-or1-7b-preview (#5173)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-14 12:20:20 +02:00
qwerty108109
4fc68409ff Update README.md (#5172)
Modified the README.md to separate out the different docker run commands to make it easier to copy into the terminal.

Signed-off-by: qwerty108109 <97707491+qwerty108109@users.noreply.github.com>
2025-04-14 10:48:10 +02:00
Richard Palethorpe
e587044449 fix(stablediffusion): Avoid GGML commit which causes CUDA compile error (#5170)
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-14 09:29:09 +02:00
LocalAI [bot]
1f09db5161 chore: ⬆️ Update ggml-org/llama.cpp to 71e90e8813f90097701e62f7fce137d96ddf41e2 (#5171)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-13 21:46:07 +00:00
Ettore Di Giacinto
05b744f086 chore(model gallery): add daichi-12b (#5169)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-13 15:53:11 +02:00
Ettore Di Giacinto
89ca4bc02d chore(model gallery): add hamanasu-magnum-4b-i1 (#5168)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-13 14:37:59 +02:00
Ettore Di Giacinto
e626aa48a4 chore(model gallery): add hamanasu-adventure-4b-i1 (#5167)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-13 14:35:57 +02:00
Ettore Di Giacinto
752b5e0339 chore(model gallery): add mag-picaro-72b (#5166)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-13 14:34:14 +02:00
Ettore Di Giacinto
637d72d6e3 chore(model gallery): add lightthinker-qwen (#5165)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-13 14:31:05 +02:00
LocalAI [bot]
f3bfec580a chore: ⬆️ Update ggml-org/llama.cpp to bc091a4dc585af25c438c8473285a8cfec5c7695 (#5158)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-13 08:23:41 +00:00
Ettore Di Giacinto
165c1ddff3 chore(model gallery): add tesslate_gradience-t1-3b-preview (#5160)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-12 10:37:40 +02:00
Ettore Di Giacinto
fb83238e9e chore(model gallery): add zyphra_zr1-1.5b (#5157)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-11 10:06:05 +02:00
Ettore Di Giacinto
700bfa41c7 chore(model gallery): add agentica-org_deepcoder-1.5b-preview (#5156)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-11 10:03:59 +02:00
LocalAI [bot]
25bdc350df chore: ⬆️ Update ggml-org/llama.cpp to 64eda5deb9859e87a020e56bab5d2f9ca956f1de (#5155)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-10 21:44:55 +00:00
Richard Palethorpe
1b899e1a68 feat(stablediffusion): Enable SYCL (#5144)
* feat(sycl): Enable SYCL for stable diffusion

This is a pain because we compile with CGO, but SD is compiled with
CMake. I don't think we can easily use CMake to set the linker flags
necessary. Also I could not find pkg-config calls that would fully set
the flags, so some of them are set manually.

See https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-link-line-advisor.html
for reference. I also resorted to searching the shared object files in
MKLROOT/lib for the symbols.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(ci): Don't set nproc on cmake

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-10 15:20:53 +02:00
Ettore Di Giacinto
3bf13f8c69 chore(model gallery): add soob3123_amoral-cogito-v1-preview-qwen-14b (#5154)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-10 10:07:56 +02:00
Ettore Di Giacinto
7a00729374 chore(model gallery): add trappu_magnum-picaro-0.7-v2-12b (#5153)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-10 10:03:42 +02:00
Ettore Di Giacinto
d484028532 feat(diffusers): add support for Lumina2Text2ImgPipeline (#4806)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-10 09:55:51 +02:00
LocalAI [bot]
0eb7fc2c41 chore: ⬆️ Update ggml-org/llama.cpp to d3bd7193ba66c15963fd1c59448f22019a8caf6e (#5152)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-09 22:01:25 +00:00
Ettore Di Giacinto
a69e30e0c9 chore(model gallery): add agentica-org_deepcoder-14b-preview (#5151)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-09 16:55:47 +02:00
Ettore Di Giacinto
9c018e6bff chore(model gallery): add deepcogito_cogito-v1-preview-llama-70b (#5150)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-09 16:54:59 +02:00
Ettore Di Giacinto
281e818047 chore(model gallery): add deepcogito_cogito-v1-preview-llama-70b (#5150)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-09 16:53:28 +02:00
Ettore Di Giacinto
270f0e2157 chore(model gallery): add deepcogito_cogito-v1-preview-qwen-32b (#5149)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-09 16:48:15 +02:00
Ettore Di Giacinto
673e59e76c chore(model gallery): add deepcogito_cogito-v1-preview-llama-3b (#5148)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-09 16:42:53 +02:00
LocalAI [bot]
5a8a2adb44 chore: ⬆️ Update ggml-org/llama.cpp to b32efad2bc42460637c3a364c9554ea8217b3d7f (#5146)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-09 15:39:04 +02:00
Ettore Di Giacinto
a7317d23bf chore(model gallery): add deepcogito_cogito-v1-preview-llama-8b (#5147)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-09 10:02:09 +02:00
Ettore Di Giacinto
2bab9b5fe2 fix: fix gallery name for cogito-v1-preview-qwen-14B
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-04-08 22:15:32 +02:00
Ettore Di Giacinto
081be3ba7d chore(model gallery): add cogito-v1-preview-qwen-14b (#5145)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 22:04:14 +02:00
Ettore Di Giacinto
25e6f21322 chore(deps): bump llama.cpp to 4ccea213bc629c4eef7b520f7f6c59ce9bbdaca0 (#5143)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 11:26:06 +02:00
Ettore Di Giacinto
b4df1c9cf3 fix(gemma): improve prompt for tool calls (#5142)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 10:12:42 +02:00
Ettore Di Giacinto
4fbd6609f2 chore(model gallery): add meta-llama_llama-4-scout-17b-16e-instruct (#5141)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 10:12:28 +02:00
Ettore Di Giacinto
7387932f89 chore(model gallery): add mensa-beta-14b-instruct-i1 (#5140)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 10:01:24 +02:00
Ettore Di Giacinto
59c37e67b2 chore(model gallery): add eurydice-24b-v2-i1 (#5139)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 09:56:29 +02:00
Ettore Di Giacinto
c09d227647 chore(model gallery): add watt-ai_watt-tool-70b (#5138)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 09:42:49 +02:00
Ettore Di Giacinto
547d322b28 chore(model gallery): add arliai_qwq-32b-arliai-rpr-v (#5137)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-08 09:40:26 +02:00
dependabot[bot]
a6f0bb410f chore(deps): bump securego/gosec from 2.22.0 to 2.22.3 (#5134)
Bumps [securego/gosec](https://github.com/securego/gosec) from 2.22.0 to 2.22.3.
- [Release notes](https://github.com/securego/gosec/releases)
- [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml)
- [Commits](https://github.com/securego/gosec/compare/v2.22.0...v2.22.3)

---
updated-dependencies:
- dependency-name: securego/gosec
  dependency-version: 2.22.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-07 21:09:45 +00:00
Ettore Di Giacinto
710f624ecd fix(webui): improve model display, do not block view (#5133)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-07 18:03:25 +02:00
LocalAI [bot]
5018452be7 chore: ⬆️ Update ggml-org/llama.cpp to 916c83bfe7f8b08ada609c3b8e583cf5301e594b (#5130)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-06 21:51:51 +00:00
Ettore Di Giacinto
ece239966f chore: ⬆️ Update ggml-org/llama.cpp to 6bf28f0111ff9f21b3c1b1eace20c590281e7ba6 (#5127)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-06 14:01:51 +02:00
Ettore Di Giacinto
3b8bc7e64c chore(model gallery): add open-thoughts_openthinker2-7b (#5129)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-06 10:53:22 +02:00
Ettore Di Giacinto
fc73b2b430 chore(model gallery): add open-thoughts_openthinker2-32b (#5128)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-06 10:48:21 +02:00
Ettore Di Giacinto
901dba6063 chore(model gallery): add gemma-3-27b-it-qat (#5124)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-05 08:46:49 +02:00
LocalAI [bot]
b88a7a4550 chore: ⬆️ Update ggml-org/llama.cpp to 3e1d29348b5d77269f6931500dd1c1a729d429c8 (#5123)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-04 21:49:53 +00:00
Ettore Di Giacinto
106e40845f chore(model gallery): add katanemo_arch-function-chat-3b (#5122) 2025-04-04 10:45:44 +02:00
Ettore Di Giacinto
0064bec8f5 chore(model gallery): add katanemo_arch-function-chat-1.5b (#5121) 2025-04-04 10:31:44 +02:00
Ettore Di Giacinto
9e6dbb0b5a chore(model gallery): add katanemo_arch-function-chat-7b (#5120)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-04 10:29:47 +02:00
Ettore Di Giacinto
d26e61388b chore(model gallery): add tesslate_synthia-s1-27b (#5119)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-04 10:27:52 +02:00
Ettore Di Giacinto
31a7084c75 chore(model gallery): add gemma-3-4b-it-qat (#5118)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-04 10:23:56 +02:00
Ettore Di Giacinto
128612a6fc chore(model gallery): add gemma-3-12b-it-qat (#5117)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-04 10:21:45 +02:00
LocalAI [bot]
6af3f46bc3 chore: ⬆️ Update ggml-org/llama.cpp to c262beddf29f3f3be5bbbf167b56029a19876956 (#5116)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-03 22:59:49 +00:00
Richard Palethorpe
d2cf8ef070 fix(sycl): kernel not found error by forcing -fsycl (#5115)
* chore(sycl): Update oneapi to 2025:1

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(sycl): Pass -fsycl flag as workaround

-fsycl should be set by llama.cpp's cmake file, but something goes wrong
and it doesn't appear to get added

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(build): Speed up llama build by using all CPUs

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-03 16:22:59 +02:00
Ettore Di Giacinto
259ad3cfe6 chore(model gallery): add all-hands_openhands-lm-1.5b-v0.1 (#5114)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-03 10:25:46 +02:00
Ettore Di Giacinto
18b320d577 chore(deps): bump llama.cpp to 'f01bd02376f919b05ee635f438311be8dfc91d7c (#5110)
chore(deps): bump llama.cpp to 'f01bd02376f919b05ee635f438311be8dfc91d7c'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-03 10:23:14 +02:00
Ettore Di Giacinto
89e151f035 chore(model gallery): add all-hands_openhands-lm-7b-v0.1 (#5113)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-03 10:20:20 +02:00
Ettore Di Giacinto
22060f6410 chore(model gallery): add burtenshaw_gemmacoder3-12b (#5112)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-03 10:17:57 +02:00
Ettore Di Giacinto
7ee3288460 chore(model gallery): add all-hands_openhands-lm-32b-v0.1 (#5111)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-03 10:15:57 +02:00
LocalAI [bot]
cbbc954a8c chore: ⬆️ Update ggml-org/llama.cpp to f423981ac806bf031d83784bcb47d2721bc70f97 (#5108)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-02 09:22:53 +02:00
Ettore Di Giacinto
2c425e9c69 feat(loader): enhance single active backend by treating as singleton (#5107)
feat(loader): enhance single active backend by treating at singleton

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-01 20:58:11 +02:00
LocalAI [bot]
c59975ab05 chore: ⬆️ Update ggml-org/llama.cpp to c80a7759dab10657b9b6c3e87eef988a133b9b6a (#5105)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-04-01 00:01:34 +02:00
Ettore Di Giacinto
05f7004487 fix: race during stop of active backends (#5106)
* chore: drop double call to stop all backends, refactors

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: do lock when cycling to models to delete

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-01 00:01:10 +02:00
Ettore Di Giacinto
2f9203cd2a chore: drop remoteLibraryURL from kong vars (#5103)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-31 22:48:17 +02:00
LocalAI [bot]
f09b33f2ef docs: ⬆️ update docs version mudler/LocalAI (#5104)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-31 22:48:03 +02:00
Ettore Di Giacinto
65470b0ab1 Update README 2025-03-31 21:51:09 +02:00
Ettore Di Giacinto
9a23fe662b Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-03-31 19:35:34 +02:00
LocalAI [bot]
6d7ac09e96 chore: ⬆️ Update ggml-org/llama.cpp to 4663bd353c61c1136cd8a97b9908755e4ab30cec (#5100)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-30 21:59:30 +00:00
Ettore Di Giacinto
c2a39e3639 fix(llama.cpp): properly handle sigterm (#5099)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-30 18:08:29 +02:00
Ettore Di Giacinto
ae625a4d00 chore(model gallery): add hammer2.0-7b (#5098)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-30 09:50:21 +02:00
Ettore Di Giacinto
7f3a029596 chore(model gallery): add forgotten-abomination-70b-v5.0 (#5097)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-30 09:48:24 +02:00
Ettore Di Giacinto
b34cf00819 chore(model gallery): add galactic-qwen-14b-exp1 (#5096)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-30 09:46:51 +02:00
LocalAI [bot]
d4a10b4300 chore: ⬆️ Update ggml-org/llama.cpp to 0bb2919335d00ff0bc79d5015da95c422de51f03 (#5095)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-29 21:40:45 +00:00
Ettore Di Giacinto
9c74d74f7b feat(gguf): guess default context size from file (#5089)
feat(gguf): guess default config file from files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-29 14:42:14 +01:00
Ettore Di Giacinto
679ee7bea4 chore(model gallery): add chaoticneutrals_very_berry_qwen2_7b (#5093)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-29 12:34:49 +01:00
Ettore Di Giacinto
77d7dc62c4 chore(model gallery): add tesslate_tessa-t1-3b (#5092)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-29 12:15:28 +01:00
Ettore Di Giacinto
699519d1fe chore(model gallery): add tesslate_tessa-t1-7b (#5091)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-29 12:12:01 +01:00
Ettore Di Giacinto
8faf39d34e chore(model gallery): add tesslate_tessa-t1-14b (#5090)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-29 11:58:39 +01:00
Ettore Di Giacinto
5d261a6fcd chore(model gallery): add tesslate_tessa-t1-32b (#5088)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-29 11:53:47 +01:00
Ettore Di Giacinto
22d5727089 chore(model gallery): add tarek07_legion-v2.1-llama-70b (#5087) 2025-03-29 11:27:06 +01:00
LocalAI [bot]
c965197d6f chore: ⬆️ Update ggml-org/llama.cpp to b4ae50810e4304d052e630784c14bde7e79e4132 (#5085)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-28 21:37:18 +00:00
Ettore Di Giacinto
994a6c4939 chore(model gallery): fallen-safeword-70b-r1-v4.1 (#5084) 2025-03-28 15:20:38 +01:00
Ettore Di Giacinto
f926d2a72b chore(model gallery): thoughtless-fallen-abomination-70b-r1-v4.1-i1 (#5083) 2025-03-28 15:11:54 +01:00
Ettore Di Giacinto
ddeb9ed93e chore(model gallery): qwen2.5-14b-instruct-1m-unalign-i1 (#5082) 2025-03-28 15:08:33 +01:00
Ettore Di Giacinto
c7e99c7b59 chore(model gallery): gemma-3-starshine-12b-i1 (#5081) 2025-03-28 14:50:39 +01:00
Ettore Di Giacinto
6fabc92e56 chore(model gallery): add soob3123_amoral-gemma3-12b-v2 (#5080)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-28 14:45:02 +01:00
LocalAI [bot]
4645b3c919 chore: ⬆️ Update ggml-org/llama.cpp to 5dec47dcd411fdf815a3708fd6194e2b13d19006 (#5079)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-27 23:32:33 +00:00
Dave
134fe2705c fix: ensure git-lfs is present (#5078)
devcontainer clean builds had issue with git-lfs -- should this be installed for _all_ images for safety?

Signed-off-by: Dave Lee <dave@gray101.com>
2025-03-27 22:23:28 +01:00
LocalAI [bot]
3cca32ba7e chore: ⬆️ Update ggml-org/llama.cpp to b3298fa47a2d56ae892127ea038942ab1cada190 (#5077)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-27 10:47:07 +01:00
Ettore Di Giacinto
c069e61b26 chore(model gallery): add textsynth-8b-i1 (#5076)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-26 14:40:19 +01:00
Ettore Di Giacinto
7fa159e164 chore(model gallery): add blacksheep-24b-i1 (#5075)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-26 14:37:30 +01:00
Ettore Di Giacinto
5f92025617 chore(model gallery): add gemma-3-glitter-12b-i1 (#5074)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-26 10:19:00 +01:00
LocalAI [bot]
333e1bc732 chore: ⬆️ Update ggml-org/llama.cpp to ef19c71769681a0b3dde6bc90911728376e5d236 (#5073)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-26 09:51:20 +01:00
Ettore Di Giacinto
e90b97c144 chore(model gallery): add alamios_mistral-small-3.1-draft-0.5b (#5071)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-25 10:10:45 +01:00
Ettore Di Giacinto
747eeb1d46 chore(model gallery): add helpingai_helpingai3-raw (#5070)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-25 10:09:00 +01:00
Ettore Di Giacinto
5d2c53abc0 chore(model gallery): add jdineen_llama-3.1-8b-think (#5069)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-25 10:06:24 +01:00
LocalAI [bot]
0b1e721242 chore: ⬆️ Update ggml-org/llama.cpp to c95fa362b3587d1822558f7e28414521075f254f (#5068)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-24 21:37:16 +00:00
Ettore Di Giacinto
8c76a9ce99 chore(model gallery): add dusk_rainbow (#5066)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-24 09:49:32 +01:00
Ettore Di Giacinto
338321af5b chore(model gallery): add eximius_persona_5b (#5065)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-24 09:30:20 +01:00
Ettore Di Giacinto
2774a92484 chore(model gallery): add impish_llama_3b (#5064)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-24 09:27:04 +01:00
LocalAI [bot]
1a6bfb41a1 chore: ⬆️ Update ggml-org/llama.cpp to 77f9c6bbe55fccd9ea567794024cb80943947901 (#5062)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-23 21:37:14 +00:00
Ettore Di Giacinto
314981eaf8 chore(model gallery): add fiendish_llama_3b (#5061)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 10:00:19 +01:00
Ettore Di Giacinto
d7266c633d chore(model gallery): add sicariussicariistuff_x-ray_alpha (#5060)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 09:56:35 +01:00
Ettore Di Giacinto
eb4d5f2b95 chore(model gallery): add mawdistical_mawdistic-nightlife-24b (#5059)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 09:52:50 +01:00
Ettore Di Giacinto
c63b449ad6 chore(model gallery): add huihui-ai_gemma-3-1b-it-abliterated (#5058)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 09:35:05 +01:00
Ettore Di Giacinto
dd4a778c2c chore(model gallery): add thedrummer_fallen-gemma3-27b-v1 (#5057)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 09:32:58 +01:00
Ettore Di Giacinto
a0896d21d6 chore(model gallery): add thedrummer_fallen-gemma3-12b-v1 (#5056)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 09:31:37 +01:00
Ettore Di Giacinto
0e697f951a chore(model gallery): add thedrummer_fallen-gemma3-4b-v1 (#5055)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 09:30:17 +01:00
Ettore Di Giacinto
fa4bb9082d chore(model gallery): add knoveleng_open-rs3 (#5054)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-23 09:27:27 +01:00
LocalAI [bot]
8ff7b15441 chore: ⬆️ Update ggml-org/llama.cpp to ba932dfb50cc694645b1a148c72f8c06ee080b17 (#5053)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-22 22:18:55 +00:00
LocalAI [bot]
dd45f85a20 chore: ⬆️ Update ggml-org/llama.cpp to 4375415b4abf94fb36a5fd15f233ac0ee23c0bd1 (#5052)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-21 21:36:25 +00:00
Ettore Di Giacinto
decdd9e522 chore(model gallery): add luvgpt_phi3-uncensored-chat (#5051)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-21 09:11:07 +01:00
Ettore Di Giacinto
31a21d4a2c chore(model gallery): add sao10k_llama-3.3-70b-vulpecula-r1 (#5050)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-21 09:08:55 +01:00
Ettore Di Giacinto
2c129843a7 chore(model gallery): add qwen-writerdemo-7b-s500-i1 (#5049)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-21 09:03:39 +01:00
LocalAI [bot]
ce71a0bcfb chore: ⬆️ Update ggml-org/llama.cpp to e04643063b3d240b8c0fdba98677dff6ba346784 (#5047)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-20 21:34:51 +00:00
Ettore Di Giacinto
0a32c38317 chore(model gallery): add basic function template for gemma
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-20 09:32:21 +01:00
Ettore Di Giacinto
36f596f260 chore(model gallery): add soob3123_amoral-gemma3-4b (#5046)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-20 09:30:04 +01:00
Ettore Di Giacinto
953552545b chore(model gallery): add samsungsailmontreal_bytecraft (#5045)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-20 09:27:33 +01:00
Ettore Di Giacinto
835e55b1de chore(model gallery): add rootxhacker_apollo-v3-32b (#5044)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-20 09:20:42 +01:00
Ettore Di Giacinto
dcd2921eaa chore(model gallery): add gemma-3-4b-it-uncensored-dbl-x-i1 (#5043)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-20 09:17:20 +01:00
LocalAI [bot]
5e6459fd18 chore: ⬆️ Update ggml-org/llama.cpp to 568013d0cd3d5add37c376b3d5e959809b711fc7 (#5042)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-19 21:47:18 +00:00
Ettore Di Giacinto
50ddb3eb59 chore(model gallery): add nvidia_llama-3_3-nemotron-super-49b-v1 (#5041)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-19 09:37:27 +01:00
Ettore Di Giacinto
5eebfee4b5 chore(model gallery): add gryphe_pantheon-rp-1.8-24b-small-3.1 (#5040)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-19 09:32:47 +01:00
Ettore Di Giacinto
567919ea90 chore(model gallery): add mistralai_mistral-small-3.1-24b-instruct-2503 (#5039)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-19 09:29:23 +01:00
LocalAI [bot]
27a3997530 chore(model-gallery): ⬆️ update checksum (#5036)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-19 09:18:40 +01:00
LocalAI [bot]
192ba2c657 chore: ⬆️ Update ggml-org/llama.cpp to d84635b1b085d54d6a21924e6171688d6e3dfb46 (#5035)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-18 22:23:39 +00:00
Ettore Di Giacinto
92abac9ca8 chore(model gallery): add soob3123_amoral-gemma3-12b (#5034)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-18 09:38:05 +01:00
Ettore Di Giacinto
04ebbbd73a chore(model gallery): add mlabonne_gemma-3-4b-it-abliterated (#5033)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-18 09:36:14 +01:00
Ettore Di Giacinto
55305e0d95 chore(model gallery): add mlabonne_gemma-3-12b-it-abliterated (#5032)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-18 09:32:41 +01:00
Ettore Di Giacinto
67623639e4 chore(model gallery): add mlabonne_gemma-3-27b-it-abliterated (#5031)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-18 09:30:25 +01:00
LocalAI [bot]
cc76def342 chore: ⬆️ Update ggml-org/llama.cpp to b1b132efcba216c873715c483809730bb253f4a1 (#5029)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-17 21:43:15 +00:00
Ettore Di Giacinto
4967fa5928 chore(model gallery): disable gemma3 mmproj
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-17 12:34:21 +01:00
Ettore Di Giacinto
2b98e4ec56 chore(model gallery): update gemma3 URLs
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-17 12:22:35 +01:00
Ettore Di Giacinto
fa1d058ee2 chore(model gallery): add mproj files for gemma3 models (#5028)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-17 12:11:46 +01:00
Ettore Di Giacinto
a49a588bfa chore(model gallery): add readyart_forgotten-safeword-70b-3.6 (#5027)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-17 11:50:34 +01:00
LocalAI [bot]
ca7dda61c6 chore: ⬆️ Update ggml-org/llama.cpp to 8ba95dca2065c0073698afdfcda4c8a8f08bf0d9 (#5026)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-16 21:42:17 +00:00
Ettore Di Giacinto
ffedddd76d chore(model gallery): add beaverai_mn-2407-dsk-qwqify-v0.1-12b (#5024)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-16 09:33:19 +01:00
Ettore Di Giacinto
766c76ae8e chore(model gallery): add pocketdoc_dans-sakurakaze-v1.0.0-12b (#5023)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-16 09:29:48 +01:00
LocalAI [bot]
3096ff33e9 chore: ⬆️ Update ggml-org/llama.cpp to f4c3dd5daa3a79f713813cf1aabdc5886071061d (#5022)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-15 21:43:48 +00:00
Ettore Di Giacinto
90a7451da4 chore(model gallery): add allura-org_bigger-body-70b (#5021)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-15 14:43:51 +01:00
LocalAI [bot]
529a4b9ee8 chore: ⬆️ Update ggml-org/llama.cpp to 9f2250ba722738ec0e6ab684636268a79160c854 (#5019)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-14 21:45:54 +00:00
Ettore Di Giacinto
0567e104eb chore(model gallery): add eurollm-9b-instruct (#5017)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-14 09:25:44 +01:00
Ettore Di Giacinto
ecbeacd022 chore(model gallery): add prithivmlmods_viper-coder-32b-elite13 (#5016)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-14 09:20:27 +01:00
Ettore Di Giacinto
2772960e41 chore(model gallery): add nousresearch_deephermes-3-llama-3-3b-preview (#5015)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-14 09:16:17 +01:00
Ettore Di Giacinto
1b694191e2 chore(model gallery): add nousresearch_deephermes-3-mistral-24b-preview (#5014)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-14 09:13:27 +01:00
Ettore Di Giacinto
69578a5f8f chore(model gallery): add models/qgallouedec_gemma-3-27b-it-codeforces-sft (#5013)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-14 09:11:13 +01:00
LocalAI [bot]
7d96cfe72b chore: ⬆️ Update ggml-org/llama.cpp to 84d547554123a62e9ac77107cb20e4f6cc503af4 (#5011)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-13 22:30:17 +00:00
Ettore Di Giacinto
423514a5a5 fix(clip): do not imply GPU offload by default (#5010)
* fix(clip): do not imply GPUs by default

Until a better solution is found upstream, be conservative and default
to GPU.

https://github.com/ggml-org/llama.cpp/pull/12322
https://github.com/ggml-org/llama.cpp/pull/12322#issuecomment-2720970695

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* allow to override gpu via backend options

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-13 15:14:11 +01:00
Ettore Di Giacinto
12568c7d6d chore(model gallery): add gemma-3-1b-it (#5009)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-13 09:48:40 +01:00
Ettore Di Giacinto
8d16a0a536 chore(model gallery): add gemma-3-4b-it (#5008)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-13 09:47:01 +01:00
Ettore Di Giacinto
87ca801f00 chore(model gallery): add gemma-3-12b-it (#5007)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-13 09:44:49 +01:00
Ettore Di Giacinto
e4ecbb6c30 chore(model gallery): add gemma-3-27b-it (#5003)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-13 08:28:28 +01:00
LocalAI [bot]
b1a67de2b9 chore: ⬆️ Update ggml-org/llama.cpp to f08f4b3187b691bb08a8884ed39ebaa94e956707 (#5006)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-13 01:01:30 +00:00
LocalAI [bot]
71a23910fe chore: ⬆️ Update ggml-org/llama.cpp to 80a02aa8588ef167d616f76f1781b104c245ace0 (#5004)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-12 16:26:09 +00:00
LocalAI [bot]
0ede31f9cf chore: ⬆️ Update ggml-org/llama.cpp to 10f2e81809bbb69ecfe64fc8b4686285f84b0c07 (#4996)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-12 14:13:04 +00:00
Ettore Di Giacinto
9f5dcf2d1e feat(aio): update AIO image defaults (#5002)
* feat(aio): update AIO image defaults

cpu:
 - text-to-text: llama3.1
 - embeddings: granite-embeddings
 - vision: moonream2

gpu/intel:
 - text-to-text: localai-functioncall-qwen2.5-7b-v0.5
 - embeddings: granite-embeddings
 - vision: minicpm

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(aio): use minicpm as moondream2 stopped working

https://github.com/ggml-org/llama.cpp/pull/12322#issuecomment-2717483759

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-12 12:55:06 +01:00
Ettore Di Giacinto
e878556e98 chore(model gallery): add trashpanda-org_qwq-32b-snowdrop-v0 (#5000)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-12 08:26:09 +01:00
Ettore Di Giacinto
b096928172 chore(model gallery): add open-r1_olympiccoder-7b (#4999)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-12 08:24:35 +01:00
Ettore Di Giacinto
db7442ae67 chore(model gallery): add open-r1_olympiccoder-32b (#4998)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-12 08:23:01 +01:00
Ettore Di Giacinto
b6cd430e08 chore(model gallery): add thedrummer_gemmasutra-small-4b-v1 (#4997)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-12 08:19:51 +01:00
LocalAI [bot]
478e50cda2 chore: ⬆️ Update ggml-org/llama.cpp to 2c9f833d17bb5b8ea89dec663b072b5420fc5438 (#4991)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-03-11 11:19:03 +00:00
Ettore Di Giacinto
1db2b9943c chore(deps): Bump grpcio to 1.71.0 (#4993)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-11 09:44:21 +01:00
Ettore Di Giacinto
ac41aa8b67 chore(model gallery): add openpipe_deductive-reasoning-qwen-32b (#4995)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-11 09:44:07 +01:00
Ettore Di Giacinto
156a98e2e7 chore(model gallery): add openpipe_deductive-reasoning-qwen-14b (#4994)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-11 09:40:38 +01:00
dependabot[bot]
d88ec1209e chore(deps): Bump docs/themes/hugo-theme-relearn from 4a4b60e to 9a020e7 (#4988)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `4a4b60e` to `9a020e7`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](4a4b60ef04...9a020e7ead)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-11 09:39:04 +01:00
dependabot[bot]
fde8dbfc80 chore(deps): Bump appleboy/ssh-action from 1.2.1 to 1.2.2 (#4978)
Bumps [appleboy/ssh-action](https://github.com/appleboy/ssh-action) from 1.2.1 to 1.2.2.
- [Release notes](https://github.com/appleboy/ssh-action/releases)
- [Changelog](https://github.com/appleboy/ssh-action/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/appleboy/ssh-action/compare/v1.2.1...v1.2.2)

---
updated-dependencies:
- dependency-name: appleboy/ssh-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-11 08:29:31 +01:00
Ettore Di Giacinto
879dc73eba Revert "chore(deps): Bump intel-extension-for-pytorch from 2.3.110+xpu to 2.6.10+xpu in /backend/python/diffusers" (#4992)
Revert "chore(deps): Bump intel-extension-for-pytorch from 2.3.110+xpu to 2.6…"

This reverts commit 1dfc52de16.
2025-03-11 08:29:05 +01:00
dependabot[bot]
1dfc52de16 chore(deps): Bump intel-extension-for-pytorch from 2.3.110+xpu to 2.6.10+xpu in /backend/python/diffusers (#4973)
chore(deps): Bump intel-extension-for-pytorch

Bumps intel-extension-for-pytorch from 2.3.110+xpu to 2.6.10+xpu.

---
updated-dependencies:
- dependency-name: intel-extension-for-pytorch
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-10 21:14:43 +00:00
Ettore Di Giacinto
1331129485 fix(routes): do not gate generated artifacts via key (#4971)
fix(routes): do not gate generated images via key

We generate unique uris for images.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-10 15:58:25 +01:00
Ettore Di Giacinto
1cd98062e5 chore(model gallery): add hyperllama3.1-v2-i1 (#4970)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-10 10:52:36 +01:00
Ettore Di Giacinto
9791d9b77a chore(model gallery): add opencrystal-l3-15b-v2.1-i1 (#4969)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-10 10:50:02 +01:00
Ettore Di Giacinto
8956452a45 chore(model gallery): add llmevollama-3.1-8b-v0.1-i1 (#4968)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-10 10:46:49 +01:00
LocalAI [bot]
f3659fa49c chore: ⬆️ Update ggml-org/llama.cpp to 1e2f78a00450593e2dfa458796fcdd9987300dfc (#4966)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-09 21:44:52 +00:00
Ettore Di Giacinto
585f2be793 chore(model gallery): add tower-babel_babel-9b-chat (#4964)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-09 12:09:56 +01:00
LocalAI [bot]
d13f160222 chore: ⬆️ Update ggml-org/llama.cpp to 0fd7ca7a210bd4abc995cd728491043491dbdef7 (#4963)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-08 21:41:26 +00:00
Ettore Di Giacinto
db5495b9d7 chore(model gallery): add goppa-ai_goppa-logillama (#4962)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-08 11:37:40 +01:00
Ettore Di Giacinto
3def1ae232 chore(model gallery): add huihui-ai_qwq-32b-abliterated (#4961)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-08 11:34:37 +01:00
Ettore Di Giacinto
c6ebead8e5 chore(model gallery): add steelskull_l3.3-electra-r1-70b (#4960)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-08 11:23:42 +01:00
LocalAI [bot]
cff4a950e0 chore: ⬆️ Update ggml-org/llama.cpp to 7ab364390f92b0b8d83f69821a536b424838f3f8 (#4959)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-07 22:54:28 +00:00
Ettore Di Giacinto
e4fa894153 fix(llama.cpp): correctly handle embeddings in batches (#4957)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-07 19:29:52 +01:00
Ettore Di Giacinto
69caccfa82 chore(model gallery): add granite embeddings models (#4956)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-06 23:17:40 +01:00
Ettore Di Giacinto
ab50c13160 chore(model gallery): add nomic-embed-text-v1.5 (#4955)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-06 23:10:39 +01:00
LocalAI [bot]
56d4e82b14 chore: ⬆️ Update ggml-org/llama.cpp to 3d652bfddfba09022525067e672c3c145c074649 (#4954)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-06 21:54:14 +00:00
Ettore Di Giacinto
09b5bd48bc chore(model gallery): add rombo-org_rombo-llm-v3.1-qwq-32b (#4953)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-06 10:42:45 +01:00
Ettore Di Giacinto
957dcfb6a9 chore(model gallery): add qwen_qwq-32b (#4952)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-06 10:28:03 +01:00
Ettore Di Giacinto
67f7bffd18 chore(deps): update llama.cpp and sync with upstream changes (#4950)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-06 00:40:58 +01:00
Ettore Di Giacinto
de81b42b49 feat(ui): remove api key handling and small ui adjustments (#4948)
* chore(ui): drop set api key button

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(ui): shore in-progress installs in model view

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(ui): improve text to image view

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-05 19:37:36 +01:00
Ettore Di Giacinto
06eb7e9fa7 chore(model gallery): add llama-3.3-magicalgirl-2.5-i1 (#4946)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-05 09:35:48 +01:00
Ettore Di Giacinto
45bc1ac566 chore(model gallery): add lolzinventor_meta-llama-3.1-8b-survivev3 (#4945)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-05 09:33:41 +01:00
Ettore Di Giacinto
02aafeff75 chore(model gallery): add llama-3.1-8b-instruct-uncensored-delmat-i1 (#4944)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-05 09:31:09 +01:00
Ettore Di Giacinto
6b46c52789 feat(ui): complete design overhaul (#4942)
This PR changes entirely the UI look and feeling. It updates all
sections and makes it also mobile-ready.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-05 08:27:03 +01:00
LocalAI [bot]
d732e261a4 chore: ⬆️ Update ggml-org/llama.cpp to 5bbe6a9fe9a8796a9389c85accec89dbc4d91e39 (#4943)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-04 21:46:40 +00:00
Ettore Di Giacinto
807c574e91 chore(model gallery): add azura-qwen2.5-32b-i1 (#4941)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-04 10:33:15 +01:00
Ettore Di Giacinto
bb171a39b3 chore(model gallery): add llama-3.3-magicalgirl-2 (#4940)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-04 10:07:38 +01:00
Ettore Di Giacinto
941a4fc50e chore(model gallery): add boomer_qwen_72b-i1 (#4939)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-04 10:01:23 +01:00
Ettore Di Giacinto
afe65bd7bf chore(model gallery): add l3.3-geneticlemonade-unleashed-70b-i1 (#4938)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-04 09:56:31 +01:00
Ettore Di Giacinto
6f9762049c chore(model gallery): update qihoo360_tinyr1-32b-preview (#4937)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-04 09:53:23 +01:00
LocalAI [bot]
122970d70d chore: ⬆️ Update ggml-org/llama.cpp to dfd6b2c0be191b3abe2fd9c1b25deff01c6249d8 (#4936)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-03 21:50:31 +00:00
dependabot[bot]
8664b1c7a2 chore(deps): Bump docs/themes/hugo-theme-relearn from 02bba0f to 4a4b60e (#4934)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `02bba0f` to `4a4b60e`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](02bba0f199...4a4b60ef04)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-03 19:56:41 +00:00
Ettore Di Giacinto
c92166f38a chore(model gallery): add steelskull_l3.3-mokume-gane-r1-70b-v1.1 (#4933)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-03 09:22:22 +01:00
LocalAI [bot]
d616058b12 chore: ⬆️ Update ggml-org/llama.cpp to 14dec0c2f29ae56917907dbf2eed6b19438d0a0e (#4932)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-02 22:27:01 +00:00
Ettore Di Giacinto
a7b4001b75 feat: allow to specify a reply prefix (#4931)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 16:07:32 +01:00
Ettore Di Giacinto
ff85f01459 chore(model gallery): add thedrummer_fallen-llama-3.3-r1-70b-v1 (#4930)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 10:29:28 +01:00
Ettore Di Giacinto
695f81a08b chore(model gallery): add qihoo360_tinyr1-32b-preview (#4929)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 10:24:17 +01:00
Ettore Di Giacinto
326be287da chore(model gallery): add ibm-granite_granite-3.2-2b-instruct (#4928)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 10:22:35 +01:00
Ettore Di Giacinto
0404d98190 chore(model gallery): add ibm-granite_granite-3.2-8b-instruct (#4927)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 10:19:27 +01:00
LocalAI [bot]
0a8ec1eb22 chore: ⬆️ Update ggml-org/llama.cpp to 1782cdfed60952f9ff333fc2ab5245f2be702453 (#4926)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-02 10:02:49 +01:00
Ettore Di Giacinto
d860932dcd fix(chatml): add endoftext stopword
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 21:16:10 +01:00
Ettore Di Giacinto
1cb137bd2d fix(deephermes): correct typo
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-03-01 17:07:12 +01:00
Ettore Di Giacinto
3c279e5568 chore(model gallery): add allenai_olmocr-7b-0225-preview (#4924)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 09:10:04 +01:00
Ettore Di Giacinto
fb55e3df57 chore(model gallery): add ozone-research_0x-lite (#4923)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 09:07:01 +01:00
Ettore Di Giacinto
de46fb6e2e chore(model gallery): add ozone-research_chirp-01 (#4922)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 09:05:03 +01:00
Ettore Di Giacinto
d7a0e3c5ea chore(model gallery): add microsoft_phi-4-mini-instruct (#4921)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 08:58:01 +01:00
LocalAI [bot]
0533ea817d chore: ⬆️ Update ggml-org/llama.cpp to 06c2b1561d8b882bc018554591f8c35eb04ad30e (#4920)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-28 22:18:14 +00:00
Ettore Di Giacinto
755e4fb5f4 feat(ui): improvements to index and models page (#4918)
- mobile-friendly index
- adjust color palette
- improve search experience

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-28 19:23:32 +01:00
LocalAI [bot]
e4fdde158f chore: ⬆️ Update ggml-org/llama.cpp to b95c8af37ccf169b0a3216b7ed691af0534e5091 (#4916)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-28 00:00:39 +00:00
Ettore Di Giacinto
6d0712fa6d fix(ui): not all models comes from gallery (#4915)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 19:12:41 +01:00
Ettore Di Giacinto
bbbb28e3ca fix(models): unify usecases identifications (#4914)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 15:51:12 +01:00
Ettore Di Giacinto
3bf2e9d065 fix(ui): not all models have an Icon (#4913)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 10:52:19 +01:00
Ettore Di Giacinto
1461fd8777 chore(model gallery): add locutusque_thespis-llama-3.1-8b (#4912)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 10:02:44 +01:00
LocalAI [bot]
054860539a chore: ⬆️ Update ggml-org/llama.cpp to a800ae46da2ed7dac236aa6bf2b595da6b6294b5 (#4911)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-26 22:43:49 +00:00
Ettore Di Giacinto
c87870b18e feat(ui): improve chat interface (#4910)
* feat(ui): show more informations in the chat view, minor adjustments to model gallery

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(ui): UI improvements

Visual improvements and bugfixes including:
- disable pagination during search
- fix scrolling on new message

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-26 18:27:18 +01:00
Ettore Di Giacinto
5ad2be9c45 feat(ui): small improvements to chat interface (#4907)
- Change chat colors
- Improve layout on small windows

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-26 11:10:40 +01:00
LocalAI [bot]
61a24746a1 chore: ⬆️ Update ggml-org/llama.cpp to d7cfe1ffe0f435d0048a6058d529daf76e072d9c (#4908)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-25 21:58:37 +00:00
Ettore Di Giacinto
d557eb9361 chore(model gallery): add latitudegames_wayfarer-large-70b-llama-3.3 (#4903)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-25 10:21:54 +01:00
Ettore Di Giacinto
a9a1a361a9 chore(model gallery): add perplexity-ai_r1-1776-distill-llama-70b (#4902)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-25 09:59:21 +01:00
Ettore Di Giacinto
12d070af80 chore(model gallery): add sicariussicariistuff_phi-line_14b (#4901)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-25 09:56:44 +01:00
LocalAI [bot]
8d40557bc8 chore: ⬆️ Update ggml-org/llama.cpp to 7a2c913e66353362d7f28d612fd3c9d51a831eda (#4899)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-25 09:51:02 +01:00
dependabot[bot]
5a5f3a899a chore(deps): Bump docs/themes/hugo-theme-relearn from 66bc366 to 02bba0f (#4898)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `66bc366` to `02bba0f`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](66bc366c47...02bba0f199)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-25 09:50:46 +01:00
dependabot[bot]
a2d1f133c8 chore(deps): Bump appleboy/ssh-action from 1.2.0 to 1.2.1 (#4896)
Bumps [appleboy/ssh-action](https://github.com/appleboy/ssh-action) from 1.2.0 to 1.2.1.
- [Release notes](https://github.com/appleboy/ssh-action/releases)
- [Changelog](https://github.com/appleboy/ssh-action/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/appleboy/ssh-action/compare/v1.2.0...v1.2.1)

---
updated-dependencies:
- dependency-name: appleboy/ssh-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-24 21:01:39 +00:00
LocalAI [bot]
0ae6420c31 chore: ⬆️ Update ggml-org/llama.cpp to 7ad0779f5de84a68143b2c00ab5dc94a948925d3 (#4890)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-24 11:26:09 +01:00
Ettore Di Giacinto
3a3e05cf18 chore(model gallery): add flux.1dev-abliteratedv2 (#4895)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 10:11:32 +01:00
Ettore Di Giacinto
6a20388e25 chore(model gallery): add nohobby_l3.3-prikol-70b-extra (#4894)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 09:43:50 +01:00
Ettore Di Giacinto
06c836a937 chore(model gallery): add steelskull_l3.3-san-mai-r1-70b (#4893)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 09:41:06 +01:00
Ettore Di Giacinto
049a13fe78 chore(model gallery): add steelskull_l3.3-cu-mai-r1-70b (#4892)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 09:39:12 +01:00
Ettore Di Giacinto
30bf6c962f chore(stable-diffusion-ggml): update, adapt upstream changes (#4889)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-23 08:36:41 +01:00
LocalAI [bot]
a72b3a23c3 chore: ⬆️ Update ggml-org/llama.cpp to a28e0d5eb18c18e6a4598286158f427269b1444e (#4887)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-23 08:36:25 +01:00
Ettore Di Giacinto
e9971b168a feat(ui): paginate model gallery (#4886)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 21:38:00 +01:00
Ettore Di Giacinto
5b59b5e0c1 chore(model gallery): add steelskull_l3.3-mokume-gane-r1-70b (#4885)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 18:58:06 +01:00
Ettore Di Giacinto
8cfd712428 chore(model gallery): add arcee-ai_arcee-maestro-7b-preview (#4884)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 11:32:25 +01:00
Ettore Di Giacinto
21f7faa80d chore(model gallery): add ozone-ai_reverb-7b (#4883)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 11:28:27 +01:00
Ettore Di Giacinto
a6a0121118 chore(model gallery): add rombo-org_rombo-llm-v3.0-qwen-72b (#4882)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 11:19:04 +01:00
LocalAI [bot]
ba66aa33c5 chore: ⬆️ Update ggml-org/llama.cpp to 51f311e057723b7454d0ebe20f545a1a2c4db6b2 (#4881)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-21 21:51:02 +00:00
Ettore Di Giacinto
8fc024a770 chore(model gallery): add pocketdoc_dans-personalityengine-v1.2.0-24b (#4880)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 10:00:23 +01:00
Ettore Di Giacinto
52aa9d08aa chore(model gallery): add l3.1-8b-rp-ink (#4879)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 09:56:57 +01:00
Ettore Di Giacinto
4c9379c39e chore(model gallery): add smirki_uigen-t1.1-qwen-7b (#4878)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 09:54:42 +01:00
Ettore Di Giacinto
0ff2c39364 chore(model gallery): add smirki_uigen-t1.1-qwen-14b (#4877)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 09:52:20 +01:00
LocalAI [bot]
1af7e5dc49 chore: ⬆️ Update ggml-org/llama.cpp to c392e5094deaf2d1a7c18683214f007fad3fe42b (#4876)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-20 22:03:52 +00:00
Ettore Di Giacinto
af3bb64e42 fix(coqui): pin transformers (#4875)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 16:16:54 +01:00
Ettore Di Giacinto
77281f836e chore(model gallery): add internlm_oreal-7b (#4874)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:57:21 +01:00
Ettore Di Giacinto
550275811d chore(model gallery): add internlm_oreal-deepseek-r1-distill-qwen-7b (#4873)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:55:13 +01:00
Ettore Di Giacinto
c27ce6c54d chore(model gallery): add internlm_oreal-32b (#4872)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:52:28 +01:00
Ettore Di Giacinto
ac4991b069 chore(docs): update sponsor logo
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:31:41 +01:00
Ettore Di Giacinto
25bee71bb8 feat(ui): do also filter tts and image models (#4871)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:02:18 +01:00
LocalAI [bot]
b993780a3b chore: ⬆️ Update ggml-org/llama.cpp to d04e7163c85a847bc61d58c22f2c503596db7aa8 (#4870)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-20 09:42:57 +01:00
Ettore Di Giacinto
ea0c9f1168 feat(ui): show only text models in the chat interface (#4869)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 17:34:30 +01:00
Ettore Di Giacinto
08311f275a chore(model gallery): add sentientagi_dobby-unhinged-llama-3.3-70b (#4868)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 10:36:36 +01:00
Ettore Di Giacinto
4de0f2f737 chore(model gallery): add open-r1_openr1-qwen-7b (#4867)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 10:04:01 +01:00
Ettore Di Giacinto
42ae807c41 chore(model gallery): add pygmalionai_pygmalion-3-12b (#4866)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 10:02:35 +01:00
LocalAI [bot]
94593ba4c3 chore: ⬆️ Update ggml-org/llama.cpp to 63e489c025d61c7ca5ec06c5d10f36e2b76aaa1d (#4865)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-19 09:19:28 +01:00
Brandon Beiler
6a6e1a0ea9 feat(vllm): Additional vLLM config options (Disable logging, dtype, and Per-Prompt media limits) (#4855)
* Adding the following vLLM config options: disable_log_status, dtype, limit_mm_per_prompt

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

* using " marks in the config.yaml file

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

* adding in missing colon

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

---------

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>
2025-02-18 19:27:58 +01:00
Ettore Di Giacinto
5b19af99ff feat(ui): detect model usage and display link (#4864)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 19:27:07 +01:00
Ettore Di Giacinto
28fb8e607a chore(model gallery): add nbeerbower_dumpling-qwen2.5-72b (#4862)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 12:44:59 +01:00
Ettore Di Giacinto
bb85b6ef00 feat: improve ui models list in the index (#4863)
* feat(ui): improve index

- Redirect to the chat view when clicking on a model

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Display chat icon nearby the model

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 12:44:44 +01:00
Ettore Di Giacinto
b9b5a635ca chore(model gallery): add nbeerbower_dumpling-qwen2.5-32b-v2 (#4861)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:53:23 +01:00
Ettore Di Giacinto
131ea5b627 chore(model gallery): add nbeerbower_dumpling-qwen2.5-14b (#4860)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:51:29 +01:00
Ettore Di Giacinto
fac70e9642 chore(model gallery): add allenai_llama-3.1-tulu-3.1-8b (#4859)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:49:26 +01:00
Ettore Di Giacinto
7e76ea40fb chore(model gallery): add kubeguru-llama3.2-3b-v0.1 (#4858)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:47:00 +01:00
LocalAI [bot]
de09ae42ef chore: ⬆️ Update ggml-org/llama.cpp to 73e2ed3ce3492d3ed70193dd09ae8aa44779651d (#4854)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-18 09:11:07 +01:00
Ettore Di Giacinto
6424f0666d chore(deps): Bump edgevpn to v0.30.1 (#4840)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-17 16:51:22 +01:00
Ettore Di Giacinto
f3ae94ca70 chore: update Image generation docs and examples (#4841)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-17 16:51:06 +01:00
LocalAI [bot]
09c9f67a02 chore: ⬆️ Update ggml-org/llama.cpp to 2eea03d86a2d132c8245468c26290ce07a27a8e8 (#4839)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-17 10:55:30 +01:00
Ettore Di Giacinto
c264ca542d fix(ci): update repository for llama.cpp
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-17 09:33:34 +01:00
Bas Hulsken
bbf30d416d fix: change initialization order of llama-cpp-avx512 to go before avx2 variant (#4837)
changed to initialization order of the avx512 version of llama.cpp, now tries before avx2

Signed-off-by: Bas Hulsken <bhulsken@hotmail.com>
2025-02-17 09:32:21 +01:00
Ettore Di Giacinto
27617a1b06 chore(model gallery): add ozone-ai_0x-lite (#4835)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-16 09:23:26 +01:00
Ettore Di Giacinto
e84081769e chore(ci): cleanup before pulling images again 2025-02-16 09:20:22 +01:00
LocalAI [bot]
20119fc580 docs: ⬆️ update docs version mudler/LocalAI (#4834)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-15 22:45:11 +00:00
Ettore Di Giacinto
09941c0bfb chore(docs): update license year
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 18:17:15 +01:00
Ettore Di Giacinto
cabe0f4993 chore(model gallery): add davidbrowne17_llamathink-8b-instruct (#4833)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 17:31:46 +01:00
Ettore Di Giacinto
1977c7f190 chore(model gallery): add pygmalionai_eleusis-12b (#4832)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 17:21:30 +01:00
Ettore Di Giacinto
061e7c4eae chore(model gallery): add rombo-org_rombo-llm-v3.0-qwen-32b (#4830)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 10:58:27 +01:00
LocalAI [bot]
5313e660f6 chore: ⬆️ Update ggerganov/llama.cpp to 300907b2110cc17b4337334dc397e05de2d8f5e0 (#4829)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-14 21:51:49 +00:00
Ettore Di Giacinto
9e32fda304 fix(llama.cpp): improve context shift handling (#4820)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 14:55:03 +01:00
Ettore Di Giacinto
83202cae54 chore(model gallery): add nousresearch_deephermes-3-llama-3-8b-preview (#4828)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 12:25:00 +01:00
Ettore Di Giacinto
d96addfa9d chore(model gallery): add open-thoughts_openthinker-32b (#4827)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 12:03:46 +01:00
Ettore Di Giacinto
a715fe588d chore(model gallery): add sicariussicariistuff_phi-lthy4 (#4826)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 11:57:14 +01:00
LocalAI [bot]
2ac4a86bb4 chore: ⬆️ Update ggerganov/llama.cpp to 8a8c4ceb6050bd9392609114ca56ae6d26f5b8f5 (#4825)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-13 21:49:57 +00:00
Ettore Di Giacinto
8670d480a6 chore(model gallery): add nvidia_aceinstruct-72b (#4822)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-13 09:58:34 +01:00
Ettore Di Giacinto
af0b4ff237 chore(ci): update labels
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-13 09:58:19 +01:00
Ettore Di Giacinto
e694764065 chore(model gallery): add nvidia_aceinstruct-7b (#4821)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-13 09:44:53 +01:00
Ettore Di Giacinto
f3c27e0381 chore(model gallery): add nvidia_aceinstruct-1.5b (#4819)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-13 09:33:40 +01:00
LocalAI [bot]
bf44319d0d chore: ⬆️ Update ggerganov/llama.cpp to 0fb77f821f6e70ad8b8247a97d1022f0fef78991 (#4814)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-12 22:41:53 +00:00
Ettore Di Giacinto
5b133a640b chore(model gallery): add theskullery_l3.3-exp-unnamed-model-70b-v0.5 (#4813)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-12 11:05:51 +01:00
Ettore Di Giacinto
0030a3fe75 chore(model gallery): add simplescaling_s1.1-32b (#4812)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-12 11:03:05 +01:00
Ettore Di Giacinto
0a748b009e chore(ci): avoit cache hits until the ci gRPC job is fixed
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-12 09:11:40 +01:00
LocalAI [bot]
257e951def chore: ⬆️ Update ggerganov/llama.cpp to 90e4dba461b07e635fd1daf3b491c978c7dd0013 (#4810)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-12 00:13:28 +01:00
LocalAI [bot]
fbd82a2dd0 feat(swagger): update swagger (#4809)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-11 21:54:40 +00:00
Ettore Di Giacinto
5db321dad2 chore(ci): do not always regenerate the cache
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 16:36:00 +01:00
Ettore Di Giacinto
f5638a6354 feat(diffusers): allow to override image gen options (#4807)
Use the options field in the model to override kwargs if needed.

This allows to specify from the model yaml config:

```yaml

options:
- foo:bar

```

And each option will be used directly when calling the diffusers
pipeline, e.g:

```python
pipe(
  foo="bar",
)
```

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 10:16:32 +01:00
Ettore Di Giacinto
5f64cc6328 Revert "chore(deps): Bump docs/themes/lotusdocs from f5785a2 to 975da91" (#4808)
Revert "chore(deps): Bump docs/themes/lotusdocs from `f5785a2` to `975da91` (…"

This reverts commit e57b750ca3.
2025-02-11 10:05:57 +01:00
Ettore Di Giacinto
28b10e8804 chore(swagger): update (#4805)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 09:51:01 +01:00
Ettore Di Giacinto
3277f5095d chore(model gallery): add agentica-org_deepscaler-1.5b-preview (#4804)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 09:47:19 +01:00
Ettore Di Giacinto
fe3ced2919 chore(ci): try again to bump parallelism in grpc jobs
As we moved these out to self-hosted

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 09:31:00 +01:00
LocalAI [bot]
45e37a07bb chore: ⬆️ Update ggerganov/llama.cpp to 19b392d58dc08c366d0b29bd3b9c6991fa4e1662 (#4803)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-11 09:17:02 +01:00
dependabot[bot]
e57b750ca3 chore(deps): Bump docs/themes/lotusdocs from f5785a2 to 975da91 (#4801)
Bumps [docs/themes/lotusdocs](https://github.com/colinwilson/lotusdocs) from `f5785a2` to `975da91`.
- [Release notes](https://github.com/colinwilson/lotusdocs/releases)
- [Commits](f5785a2399...975da91e83)

---
updated-dependencies:
- dependency-name: docs/themes/lotusdocs
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-10 22:27:14 +00:00
Ettore Di Giacinto
49df492268 chore(ci): run grpc build on self-hosted
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 19:44:50 +01:00
Ettore Di Giacinto
516cd660f1 chore(grpcio): reduce parallelism (#4799)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 18:56:13 +01:00
Ettore Di Giacinto
8fd3ace9a1 chore(grpcio): bump to 1.70 (#4798)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 18:38:53 +01:00
Ettore Di Giacinto
099469cb05 chore(tests): decrease parallelism for gRPC builds (#4797)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 12:59:59 +01:00
Ettore Di Giacinto
6be8c0c618 chore(model gallery): add localai-functioncall-qwen2.5-7b-v0.5 (#4796)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 12:07:35 +01:00
Dave
3cddf24747 feat: Centralized Request Processing middleware (#3847)
* squash past, centralize request middleware PR

Signed-off-by: Dave Lee <dave@gray101.com>

* migrate bruno request files to examples repo

Signed-off-by: Dave Lee <dave@gray101.com>

* fix

Signed-off-by: Dave Lee <dave@gray101.com>

* Update tests/e2e-aio/e2e_test.go

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-10 12:06:16 +01:00
Ettore Di Giacinto
c330360785 chore(model gallery): add ilsp_llama-krikri-8b-instruct (#4795)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 09:54:54 +01:00
LocalAI [bot]
8cd51570e5 chore: ⬆️ Update ggerganov/llama.cpp to 19d3c8293b1f61acbe2dab1d49a17950fd788a4a (#4793)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-09 22:12:01 +00:00
Ettore Di Giacinto
0e7aa5cd15 chore(model gallery): add subtleone_qwen2.5-32b-erudite-writer (#4792)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-09 10:59:46 +01:00
Ettore Di Giacinto
e06a5f49de chore(model gallery): add huihui-ai_deepseek-r1-distill-llama-70b-abliterated (#4790)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-09 10:53:48 +01:00
Dave
fb2f847507 chore: migrate bruno request files to examples repo (#4788)
migrate bruno request files to examples repo

Signed-off-by: Dave Lee <dave@gray101.com>
2025-02-09 10:52:28 +01:00
LocalAI [bot]
e01acc88c9 chore: ⬆️ Update ggerganov/llama.cpp to e6e658319952f7ad269dc11275b9edddc721fc6d (#4787)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-08 21:57:40 +00:00
LocalAI [bot]
7a5912908a chore: ⬆️ Update ggerganov/llama.cpp to d2fe216fb2fb7ca8627618c9ea3a2e7886325780 (#4780)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-08 09:44:34 +01:00
Ettore Di Giacinto
4b1b942a7f chore(model gallery): add sicariussicariistuff_redemption_wind_24b (#4781)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-08 09:04:18 +01:00
Ettore Di Giacinto
230fe0098f chore(model gallery): add cognitivecomputations_dolphin3.0-mistral-24b (#4779)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-07 13:33:24 +01:00
Ettore Di Giacinto
cc163429dc chore(model gallery): add cognitivecomputations_dolphin3.0-r1-mistral-24b (#4778)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-07 13:31:49 +01:00
Ettore Di Giacinto
f670e0a91c chore(model gallery): add nohobby_l3.3-prikol-70b-v0.5 (#4777)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-07 13:29:53 +01:00
LocalAI [bot]
731674eee7 chore: ⬆️ Update ggerganov/llama.cpp to 8a59053f63fffc24e730cd3ea067760abfe4a919 (#4776)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-06 22:02:00 +00:00
Ettore Di Giacinto
cc1f6f913f fix(llama.cpp): disable mirostat as default (#2911)
Even if increasing the quality of the output, it has shown to have
performance drawbacks to be so noticeable that the confuses users about
speed of LocalAI ( see also
https://github.com/mudler/LocalAI/issues/2780 ).

This changeset disables Mirostat by default (which can
be still enabled manually).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Dave <dave@gray101.com>
2025-02-06 19:39:59 +01:00
Ettore Di Giacinto
7f90ff7aec chore(llama-ggml): drop deprecated backend (#4775)
The GGML format is now dead, since in the next version of LocalAI we
already bring many breaking compatibility changes, taking the occasion
also to drop ggml support (pre-gguf).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 18:36:23 +01:00
Ettore Di Giacinto
8d45670e41 fix(openai): consistently return stop reason (#4771)
We were not returning a stop reason when no tool was actually called
(even if specified).

Fixes: https://github.com/mudler/LocalAI/issues/4716

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 12:41:08 +01:00
Ettore Di Giacinto
e4b8ddb6a1 chore(model gallery): add black-ink-guild_pernicious_prophecy_70b (#4774)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 12:03:59 +01:00
Ettore Di Giacinto
a801561f81 chore(model gallery): add tiger-lab_qwen2.5-32b-instruct-cft (#4773)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 12:01:56 +01:00
Ettore Di Giacinto
16ced07102 chore(model gallery): add arliai_llama-3.3-70b-arliai-rpmax-v1.4 (#4772)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 11:59:14 +01:00
LocalAI [bot]
d35595372d chore: ⬆️ Update ggerganov/llama.cpp to d774ab3acc4fee41fbed6dbfc192b57d5f79f34b (#4770)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-06 09:02:51 +01:00
LocalAI [bot]
81be192279 chore: ⬆️ Update leejet/stable-diffusion.cpp to d46ed5e184b97c2018dc2e8105925bdb8775e02c (#4769)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-05 23:49:15 +00:00
Ettore Di Giacinto
28a1310890 chore(docs): enhance visibility
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 19:50:32 +01:00
Ettore Di Giacinto
2a702e9ca4 chore(docs): small updates
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 19:49:11 +01:00
Ettore Di Giacinto
3ecaea1b6e chore(docs): update sponsors in the website
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 19:41:55 +01:00
Ettore Di Giacinto
7daf5ac3e3 fix(gallery): do not return overrides and additional config (#4768)
When hitting /models/available we are intersted in the model
description, name and small metadatas. Configuration and overrides are
part of internals which are required only for installation.

This also solves a current bug when hitting /models/available fails if
one of the gallery items have overrides with parameters defined

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 18:37:09 +01:00
Ettore Di Giacinto
7bc80c17f8 chore(model gallery): add LocalAI-functioncall-llama3.2-3b-v0.5 (#4766)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:19:31 +01:00
Ettore Di Giacinto
1996ceb293 chore(model gallery): add krutrim-ai-labs_krutrim-2-instruct (#4765)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:17:05 +01:00
Ettore Di Giacinto
0bc3dc43da chore(model gallery): add rubenroy_gilgamesh-72b (#4764)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:13:21 +01:00
Ettore Di Giacinto
3324c4e6cb chore(model gallery): add agi-0_art-skynet-3b (#4763)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:09:33 +01:00
LocalAI [bot]
7329db4e78 chore: ⬆️ Update ggerganov/llama.cpp to 3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904 (#4762)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-04 21:48:49 +00:00
Ettore Di Giacinto
464686aee6 chore(model gallery): add suayptalha_maestro-10b (#4760)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-04 09:51:54 +01:00
Ettore Di Giacinto
bfa3d4ccff chore(model gallery): add nohobby_l3.3-prikol-70b-v0.4 (#4759)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-04 09:50:18 +01:00
Ettore Di Giacinto
6a91288c8c chore(model gallery): add fblgit_miniclaus-qw1.5b-unamgs-grpo (#4758)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-04 09:45:52 +01:00
dependabot[bot]
96cb407ee0 chore(deps): Bump docs/themes/hugo-theme-relearn from 5bcb9fe to 66bc366 (#4750)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `5bcb9fe` to `66bc366`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](5bcb9fe5e6...66bc366c47)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-04 08:57:19 +01:00
dependabot[bot]
5a19094d3a chore(deps): Bump sentence-transformers from 3.4.0 to 3.4.1 in /backend/python/transformers (#4748)
chore(deps): Bump sentence-transformers in /backend/python/transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.4.0 to 3.4.1.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.4.0...v3.4.1)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-04 08:56:51 +01:00
LocalAI [bot]
e3b943ffcb chore: ⬆️ Update ggerganov/llama.cpp to 5598f475be3e31430fbe17ebb85654ec90dc201e (#4757)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-04 08:56:11 +01:00
dependabot[bot]
df30d6a482 chore(deps): Bump GrantBirki/git-diff-action from 2.7.0 to 2.8.0 (#4746)
Bumps [GrantBirki/git-diff-action](https://github.com/grantbirki/git-diff-action) from 2.7.0 to 2.8.0.
- [Release notes](https://github.com/grantbirki/git-diff-action/releases)
- [Commits](https://github.com/grantbirki/git-diff-action/compare/v2.7.0...v2.8.0)

---
updated-dependencies:
- dependency-name: GrantBirki/git-diff-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-03 22:21:40 +00:00
Ettore Di Giacinto
c3c27b7e3d chore(model gallery): small fixups to llama3.2-fcall template
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 17:58:57 +01:00
Ettore Di Giacinto
431716d4d6 fix(gallery): remove box token to llama3.2-fcall
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-03 16:10:33 +01:00
Ettore Di Giacinto
d290fd159f chore(model gallery): add LocalAI-functioncall-llama3.2-1b-v0.4 (#4740)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 15:55:49 +01:00
Ettore Di Giacinto
051faaf771 chore(model gallery): add uncensoredai_uncensoredlm-deepseek-r1-distill-qwen-14b (#4739)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 10:46:47 +01:00
Ettore Di Giacinto
41a2dfb0d9 chore(model gallery): add thedrummer_gemmasutra-pro-27b-v1.1 (#4738)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 10:37:24 +01:00
Ettore Di Giacinto
ed0094c3d0 chore(model gallery): add steelskull_l3.3-damascus-r1 (#4737)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 10:30:07 +01:00
LocalAI [bot]
52fadeded1 feat(swagger): update swagger (#4735)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-03 10:16:42 +01:00
LocalAI [bot]
a37fa8d9c4 chore: ⬆️ Update ggerganov/llama.cpp to 90f9b88afb6447d3929843a2aa98c0f11074762d (#4736)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-02 22:18:30 +00:00
Shraddha
03974a4dd4 feat: tokenization with llama.cpp (#4724)
feat: tokenization

Signed-off-by: shraddhazpy <shraddha@shraddhafive.in>
2025-02-02 17:39:43 +00:00
Ettore Di Giacinto
1d6afbd65d feat(llama.cpp): Add support to grammar triggers (#4733)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-02 13:25:03 +01:00
LocalAI [bot]
d79f02ea09 chore: ⬆️ Update ggerganov/llama.cpp to 53debe6f3c9cca87e9520a83ee8c14d88977afa4 (#4732)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-01 21:45:26 +00:00
Ettore Di Giacinto
ba2f426e3e chore(model gallery): add fuseo1-deekseekr1-qwq-skyt1-32b-preview (#4731)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-01 10:12:15 +01:00
LocalAI [bot]
732042e5c6 chore: ⬆️ Update ggerganov/llama.cpp to aa6fb1321333fae8853d0cdc26bcb5d438e650a1 (#4728)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-31 22:31:00 +00:00
Ettore Di Giacinto
f1763aabf2 chore(model gallery): add taid-llm-1.5b (#4727)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 14:53:39 +01:00
Ettore Di Giacinto
e0d90b173b chore(model gallery): add tinyswallow-1.5b-instruct (#4726)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 14:49:02 +01:00
Ettore Di Giacinto
ff07612bfa chore(model gallery): add mistral-small-24b-instruct-2501 (#4725)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 14:45:42 +01:00
LocalAI [bot]
7badaf78a0 chore: ⬆️ Update ggerganov/llama.cpp to 8b576b6c55bc4e6be898b47522f0ef402b93ef62 (#4722)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-31 11:31:46 +00:00
Ettore Di Giacinto
af41436f1b fix(tests): pin to branch for config used in tests (#4721)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 09:57:58 +01:00
LocalAI [bot]
cd5489ce47 chore(model-gallery): ⬆️ update checksum (#4723)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-31 08:51:32 +01:00
Ettore Di Giacinto
60ec2cf751 chore(model gallery): add openthinker-7b (#4720)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 16:44:44 +01:00
Ettore Di Giacinto
244f4b564f chore(model gallery): add selene-1-mini-llama-3.1-8b (#4719)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 16:42:48 +01:00
Ettore Di Giacinto
f1d6d65417 chore(model gallery): add virtuoso-lite (#4718)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 16:38:35 +01:00
Ettore Di Giacinto
72e52c4f6a chore: drop embedded models (#4715)
Since the remote gallery was introduced this is now completely
superseded by it. In order to keep the code clean and remove redudant
parts let's simplify the usage.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 00:03:01 +01:00
LocalAI [bot]
1656e1a88e chore: ⬆️ Update ggerganov/llama.cpp to eb7cf15a808d4d7a71eef89cc6a9b96fe82989dc (#4717)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-29 21:45:38 +00:00
Ettore Di Giacinto
7f62b418a4 chore(docs): add documentation for l4t images
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-29 15:16:07 +01:00
Maximilian Kenfenheuer
1f4e66d638 chore(model gallery): add specific message templates for llama3.2 based models (#4707)
* chore(model gallery): add specific message templates for llama3.2 based models

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

* fix: yaml lint in llama3.2-quantized.yaml

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

* fix: yaml lint in llama3.2-quantized.yaml

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

---------

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-29 10:19:48 +01:00
Maximilian Kenfenheuer
a37b2c765c docs: update advanced-usage.md to reflect changes in #4700 (#4709)
Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-28 22:58:35 +01:00
Maximilian Kenfenheuer
b4b67e00bd refactor: function argument parsing using named regex (#4708)
Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-28 22:58:02 +01:00
LocalAI [bot]
91e1ff5a95 chore: ⬆️ Update ggerganov/llama.cpp to cae9fb4361138b937464524eed907328731b81f6 (#4711)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-28 21:45:14 +00:00
dependabot[bot]
d9204ea3b5 chore(deps): Bump dependabot/fetch-metadata from 2.2.0 to 2.3.0 (#4701)
Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 2.2.0 to 2.3.0.
- [Release notes](https://github.com/dependabot/fetch-metadata/releases)
- [Commits](https://github.com/dependabot/fetch-metadata/compare/v2.2.0...v2.3.0)

---
updated-dependencies:
- dependency-name: dependabot/fetch-metadata
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-28 11:50:09 +01:00
LocalAI [bot]
3d0fbcb4f7 chore: ⬆️ Update ggerganov/llama.cpp to a4417ddda98fd0558fb4d802253e68a933704b59 (#4705)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-28 09:13:43 +01:00
dependabot[bot]
03f3df9a82 chore(deps): Bump docs/themes/hugo-theme-relearn from 8dad5ee to 5bcb9fe (#4704)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `8dad5ee` to `5bcb9fe`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](8dad5ee419...5bcb9fe5e6)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-28 09:13:00 +01:00
dependabot[bot]
fff35d5528 chore(deps): Bump sentence-transformers from 3.3.1 to 3.4.0 in /backend/python/transformers (#4702)
chore(deps): Bump sentence-transformers in /backend/python/transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.3.1 to 3.4.0.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.3.1...v3.4.0)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-27 21:09:50 +00:00
Maximilian Kenfenheuer
539e94db73 feat: function argument parsing using named regex (#4700)
Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-27 15:53:05 +00:00
Ettore Di Giacinto
0f4f62cf3c chore(model gallery): add fuseo1-deepseekr1-qwq-32b-preview (#4699)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:51:06 +01:00
Ettore Di Giacinto
e7cffd7afa chore(model gallery): add fuseo1-deepseekr1-qwen2.5-instruct-32b-preview (#4698)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:31:47 +01:00
Ettore Di Giacinto
26d790a2b6 chore(model gallery): add fuseo1-deepseekr1-qwen2.5-coder-32b-preview-v0.1 (#4697)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:28:29 +01:00
Ettore Di Giacinto
5cf838c08d chore(model gallery): add confucius-o1-14b (#4696)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:26:00 +01:00
LocalAI [bot]
4db8f5cbce chore: ⬆️ Update ggerganov/llama.cpp to 178a7eb952d211b8d4232d5e50ae1b64519172a9 (#4694)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-26 21:44:54 +00:00
Ettore Di Giacinto
3b6b37a81b chore(model gallery): add deepseek-r1-qwen-2.5-32b-ablated (#4693)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-26 10:06:06 +01:00
Ettore Di Giacinto
8f5aa2d9de chore(model gallery): add dumpling-qwen2.5-32b (#4692)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-26 10:03:46 +01:00
Ettore Di Giacinto
a6bc8aa7c7 chore(model gallery): add l3.3-nevoria-r1-70b (#4691)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-26 10:01:37 +01:00
LocalAI [bot]
4ab107bc1a chore: ⬆️ Update ggerganov/llama.cpp to 26771a1491f3a4c3d5b99c4c267b81aca9a7dfa0 (#4690)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-25 21:44:14 +00:00
Ettore Di Giacinto
4c3710a531 chore(model gallery): add chuluun-qwen2.5-72b-v0.08 (#4689)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-25 11:07:31 +01:00
Ettore Di Giacinto
901b06284a chore(model gallery): add art-v0-3b (#4688)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-25 11:06:05 +01:00
Ettore Di Giacinto
8eef5a2c5e chore(model gallery): add lamarck-14b-v0.7 (#4687)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-25 11:04:12 +01:00
Gianluca Boiano
e9cace137b chore(model gallery): update deepseek-r1 prompt template (#4686)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-25 09:04:38 +01:00
LocalAI [bot]
9409c99738 chore: ⬆️ Update ggerganov/llama.cpp to c5d9effb49649db80a52caf5c0626de6f342f526 (#4685)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-24 21:45:54 +00:00
Ettore Di Giacinto
4d44ebc2f2 chore(deps): bump grpcio to 1.70.0 (#4682)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-24 10:18:22 +01:00
Gianluca Boiano
9a1182fa01 chore(model gallery): add flux.1, stablediffusion and whisper icons (#4680)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-24 08:29:02 +01:00
Gianluca Boiano
66e9ef3f33 chore(model gallery): add DeepSeek R1 14b, 32b and 70b (#4679)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-24 08:28:44 +01:00
Ettore Di Giacinto
8282414583 chore(downloader): support hf.co and hf:// URIs (#4677)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-24 08:27:22 +01:00
Gianluca Boiano
d1d7ce83d4 chore(model gallery): add MiniCPM-o-2.6-7.6b (#4676)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-24 08:27:02 +01:00
Ettore Di Giacinto
5177837ab0 chore: detect and enable avx512 builds (#4675)
chore(avx512): add support

Fixes https://github.com/mudler/LocalAI/issues/4662

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-24 08:26:44 +01:00
Ettore Di Giacinto
f9e368b7c4 chore(refactor): group cpu cap detection (#4674)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-23 16:35:44 +01:00
Ettore Di Giacinto
eef80b9880 chore(ci): cleanup tests
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-23 10:02:57 +01:00
Ettore Di Giacinto
073eaec729 chore(openvoice): drop backend (#4673)
The project (MeloTTS) has been quite since long, newer backends are much
performant and better quality overall.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-23 10:00:36 +01:00
Ettore Di Giacinto
318225f631 chore(parler-tts): drop backend (#4672)
We support at this point more extensive backends that are SOTA and
support also voice cloning, and many other features. This backend is
superseded and also poses significant maintenance burden as there is an
open issue https://github.com/mudler/LocalAI/issues/3941 which is still
open as it deps are pinning old versions of grpc.

Closes https://github.com/mudler/LocalAI/issues/3941

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-23 09:46:16 +01:00
Ettore Di Giacinto
89429a439b feat(transformers): add support to Mamba (#4669)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-23 09:30:47 +01:00
LocalAI [bot]
200fe358f0 chore: ⬆️ Update ggerganov/llama.cpp to 6152129d05870cb38162c422c6ba80434e021e9f (#4668)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-23 08:06:43 +01:00
Ettore Di Giacinto
e426ab7c23 feat(faster-whisper): add backend (#4666)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-23 08:06:18 +01:00
LocalAI [bot]
715071b68d feat(swagger): update swagger (#4667)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-22 21:51:38 +01:00
Peter Cover
a05737c7e4 chore: fix some function names in comment (#4665)
Signed-off-by: petercover <raowanxiang@outlook.com>
2025-01-22 19:35:53 +01:00
Richard Palethorpe
e8eb0b2c50 fix(stores): Stores fixes and testing (#4663)
* fix(stores): Actually check a vector is a unit vector/normalized

Instead of just summing the components to see if they equal 1.0, take
the actual magnitude/p-norm of the vector and check that is
approximately 1.0.

Note that this shouldn't change the order of results except in edge
cases if I am too lax with the precision of the equality
comparison. However it should improve performance for normalized
vectors which were being misclassified.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(stores): Add tests for known results and triangle inequality

This adds some more tests to check the cosine similarity function has
some expected mathematical properties.

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-01-22 19:35:05 +01:00
Ettore Di Giacinto
e15d29aba2 chore(stablediffusion-ncn): drop in favor of ggml implementation (#4652)
* chore(stablediffusion-ncn): drop in favor of ggml implementation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(ci): drop stablediffusion build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): add

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): try to fixup current tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Try to fix tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tests improvements

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): use quality to specify step

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): switch to sd-1.5

also increase prep time for downloading models

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-22 19:34:16 +01:00
Ettore Di Giacinto
10675ac28e Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-22 18:07:30 +01:00
Ettore Di Giacinto
0ec25b8b07 chore(model gallery): add sd-1.5-ggml and sd-3.5-medium-ggml (#4664)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-22 16:37:20 +01:00
LocalAI [bot]
e81ceff681 chore: ⬆️ Update ggerganov/llama.cpp to 6171c9d25820ccf676b243c172868819d882848f (#4661)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-21 22:04:29 +00:00
Ettore Di Giacinto
6831719e1e chore(model gallery): add deepseek-r1-distill-qwen-7b (#4660)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-21 15:09:36 +01:00
Gianluca Boiano
b264a91b3f chore(model gallery): add Deepseek-R1-Distill models (#4646)
* chore(model gallery): add Deepseek-R1-Distill-Llama-8b

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add Deepseek-R1-Distill-Qwen-1.5b

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

---------

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-21 10:37:05 +01:00
LocalAI [bot]
1a08948e63 chore: ⬆️ Update ggerganov/llama.cpp to aea8ddd5165d525a449e2fc3839db77a71f4a318 (#4657)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-21 08:37:13 +01:00
dependabot[bot]
14a1e02f44 chore(deps): Bump docs/themes/hugo-theme-relearn from 80e448e to 8dad5ee (#4656)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `80e448e` to `8dad5ee`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](80e448e5bd...8dad5ee419)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-20 23:33:40 +00:00
Ettore Di Giacinto
2f09aa1b85 chore(model gallery): add sd-3.5-large-ggml (#4647)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-20 19:04:23 +01:00
Gianluca Boiano
a396040886 chore(model gallery): remove dead icons and update LLAVA and DeepSeek ones (#4645)
* chore(model gallery): update icons and add LLAVA ones

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): fix all complains related to yamllint

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

---------

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-20 16:13:19 +01:00
Ettore Di Giacinto
aeb1dca52e chore(model gallery): add l3.3-prikol-70b-v0.2 (#4643)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-20 11:03:35 +01:00
Ettore Di Giacinto
83a8d90c52 chore(model gallery): add l3.3-70b-magnum-v4-se (#4642)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-20 10:50:29 +01:00
Ettore Di Giacinto
adebd557ce chore(model gallery): add wayfarer-12b (#4641)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-20 10:45:10 +01:00
Gianluca Boiano
0c0e015b38 chore(model gallery): update icons and add missing ones (#4639)
* chore(model gallery): uniform github URLs for icons

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icons to phi models

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icons to QwenLM models

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for Arcee org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for Meta org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon url for OpenCoder org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for RWKV org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for IBM-granite org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for OpenBMB org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for KatanemoLabs org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for Meta-Llama-3.1-8B-Instruct-abliterated

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): update icon for hermes-3-llama-3.1-8b-lorablated

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

* chore(model gallery): add icon for Google org

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>

---------

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-20 10:40:46 +01:00
Gianluca Boiano
390bb3f58b fix(model gallery): minicpm-v-2.6 is based on qwen2 (#4638)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-20 10:35:05 +01:00
Gianluca Boiano
30739d94a4 chore(model gallery): add InternLM3-8b-Q4_K_M (#4637)
chore(model gallery): add InternLM3-8b-Q4_K_M

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-20 10:34:19 +01:00
LocalAI [bot]
83e2dd5dff chore: ⬆️ Update ggerganov/llama.cpp to 92bc493917d43b83e592349e138b54c90b1c3ea7 (#4640)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-19 22:34:32 +00:00
Ettore Di Giacinto
f496d0113b chore(deps): pin numba
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-19 09:07:56 +01:00
LocalAI [bot]
a752183fb5 chore: ⬆️ Update ggerganov/llama.cpp to a1649cc13f89946322358f92ea268ae1b7b5096c (#4635)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-19 08:38:33 +01:00
LocalAI [bot]
296b97925f chore: ⬆️ Update leejet/stable-diffusion.cpp to 5eb15ef4d022bef4a391de4f5f6556e81fbb5024 (#4636)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-18 22:21:27 +00:00
Gianluca Boiano
d0cc3047dc chore(model gallery): add MiniCPM-V-2.6-8b-q4_K_M (#4633)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-18 18:36:05 +01:00
Gianluca Boiano
032a33de49 chore: remove deprecated tinydream backend (#4631)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-18 18:35:30 +01:00
Ettore Di Giacinto
1e9bf19c8d feat(transformers): merge sentencetransformers backend (#4624)
* merge sentencetransformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add alias to silently redirect sentencetransformers to transformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add alias also for transformers-musicgen

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop from makefile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move tests from sentencetransformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Remove sentencetransformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Remove tests from CI (part of transformers)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Do not always try to load the tokenizer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix typo

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tiny adjustments

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-18 18:30:30 +01:00
Gianluca Boiano
4bd8434ae0 fix(docs): add missing -core suffix to sycl images (#4630)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-18 15:47:49 +01:00
Ettore Di Giacinto
958f6eb722 chore(llama.cpp): update dependency (#4628)
Update to '3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6' and adapt to upstream changes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-18 11:55:13 +01:00
mintyleaf
96306a39a0 chore(docs): extra-Usage and Machine-Tag docs (#4627)
Rename LocalAI-Extra-Usage -> Extra-Usage, add MACHINE_TAG as cli flag option, add docs about extra-usage and machine-tag

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>
2025-01-18 08:58:38 +01:00
LocalAI [bot]
895cd7c76a feat(swagger): update swagger (#4625)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-18 08:57:49 +01:00
dependabot[bot]
cbdbe59f16 chore(deps): Bump scipy from 1.14.0 to 1.15.1 in /backend/python/transformers (#4621)
chore(deps): Bump scipy in /backend/python/transformers

Bumps [scipy](https://github.com/scipy/scipy) from 1.14.0 to 1.15.1.
- [Release notes](https://github.com/scipy/scipy/releases)
- [Commits](https://github.com/scipy/scipy/compare/v1.14.0...v1.15.1)

---
updated-dependencies:
- dependency-name: scipy
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-17 22:14:11 +00:00
Ettore Di Giacinto
ee7904f170 feat(transformers): add support to OuteTTS (#4622)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-17 19:33:25 +01:00
Ettore Di Giacinto
a761e01944 chore: alias transformers-musicgen to transformers (#4623)
chore: alias transformers-muscigen to transformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-17 18:16:17 +01:00
mintyleaf
96f8ec0402 feat: add machine tag and inference timings (#4577)
* Add machine tag option, add extraUsage option, grpc-server -> proto -> endpoint extraUsage data is broken for now

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>

* remove redurant timing fields, fix not working timings output

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>

* use middleware for Machine-Tag only if tag is specified

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>

---------

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>
2025-01-17 17:05:58 +01:00
Ettore Di Giacinto
8027fdf1c7 feat(transformers): merge musicgen functionalities to a single backend (#4620)
* feat(transformers): merge musicgen functionalities to a single backend

So we optimize space

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* specify type in tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Some adaptations for the MusicgenForConditionalGeneration type

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-17 17:01:16 +01:00
Ettore Di Giacinto
212c8e1a6d Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-17 15:11:10 +01:00
LocalAI [bot]
78533d7230 chore: ⬆️ Update ggerganov/llama.cpp to 4dbc8b9cb71876e005724f4e8f73a3544646bcf5 (#4618)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-17 10:25:04 +01:00
Ettore Di Giacinto
b5eeb5c5ab ci(arm64): run in parallel
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-17 10:24:15 +01:00
Ettore Di Giacinto
b147ad0596 ci: try to build for arm64
Try to use the free arm64 runners from Github:
https://github.blog/changelog/2025-01-16-linux-arm64-hosted-runners-now-available-for-free-in-public-repositories-public-preview/

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-17 10:14:26 +01:00
Ettore Di Giacinto
7d0ac1ea3f chore(vall-e-x): Drop backend (#4619)
There are many new architectures that are SOTA and replaces vall-e-x
nowadays.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-17 09:35:10 +01:00
Ettore Di Giacinto
d08d97bebf chore(model gallery): fix typo
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-16 22:26:55 +01:00
Ettore Di Giacinto
acb2eb23c8 feat(tts): Add Kokoro backend (#4616)
* feat(kokoro): Add new TTS backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add kokoro to images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Support combined voices

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Ignore pt and onnx

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add plbert and istfnet

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-16 22:23:09 +01:00
Ettore Di Giacinto
de4aa9fb1d chore(model gallery): add vikhr-qwen-2.5-1.5b-instruct (#4615)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-16 10:09:25 +01:00
Ettore Di Giacinto
560ba6f25e chore(model gallery): add drt-o1-14b (#4614)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-16 10:04:44 +01:00
Ettore Di Giacinto
8131ddd878 chore(model gallery): add uwu-7b-instruct (#4613)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-16 09:58:14 +01:00
LocalAI [bot]
26c3deb673 chore: ⬆️ Update ggerganov/llama.cpp to adc5dd92e8aea98f5e7ac84f6e1bc15de35130b5 (#4612)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-16 00:08:52 +00:00
Ettore Di Giacinto
6d20497d45 chore(model gallery): add lb-reranker-0.5b-v1.0 (#4611)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-15 15:54:12 +01:00
Ettore Di Giacinto
482c6b8be4 chore(model gallery): add l3.3-ms-nevoria-70b (#4610)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-15 15:51:50 +01:00
Ettore Di Giacinto
5bba5edf45 chore(model gallery): add qwerus-7b (#4609)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-15 15:46:45 +01:00
Ettore Di Giacinto
792b866727 Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-15 15:46:27 +01:00
LocalAI [bot]
f053f7bde2 chore: ⬆️ Update ggerganov/llama.cpp to b4d92a59a20eea400d8dd30844a339b76210daa0 (#4606)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-14 22:16:33 +00:00
Ettore Di Giacinto
d7dee3a5ec feat(diffusers): add support for Sana pipelines (#4603)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-14 11:13:16 +01:00
Ettore Di Giacinto
b8d74e52b1 chore(model gallery): add steiner-32b-preview (#4602)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-14 09:41:30 +01:00
Ettore Di Giacinto
62abe0d2c9 chore(model gallery): add qwen2.5-72b-rp-ink (#4601)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-14 09:33:19 +01:00
Ettore Di Giacinto
5414c294c4 chore(model gallery): add negative-anubis-70b-v1 (#4600)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-14 09:29:25 +01:00
Ettore Di Giacinto
1b3e89c89c chore(model gallery): add LocalAI-functioncall-phi-4-v0.3 (#4599)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-14 09:27:18 +01:00
Ettore Di Giacinto
69c6e5b192 chore(stablediffusion-ggml): disable sycl optimizations (#4598)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-14 09:17:55 +01:00
LocalAI [bot]
0c02512f15 chore: ⬆️ Update ggerganov/llama.cpp to 504af20ee4eae72080a56d59d744f6774f7901ce (#4597)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-14 09:07:20 +01:00
dependabot[bot]
b0ead0bf12 chore(deps): Bump securego/gosec from 2.21.4 to 2.22.0 (#4594)
Bumps [securego/gosec](https://github.com/securego/gosec) from 2.21.4 to 2.22.0.
- [Release notes](https://github.com/securego/gosec/releases)
- [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml)
- [Commits](https://github.com/securego/gosec/compare/v2.21.4...v2.22.0)

---
updated-dependencies:
- dependency-name: securego/gosec
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-13 21:17:11 +00:00
Ettore Di Giacinto
ab5adf40af chore(deps): bump llama.cpp to '924518e2e5726e81f3aeb2518fb85963a500e… (#4592)
chore(deps): bump llama.cpp to '924518e2e5726e81f3aeb2518fb85963a500e93a'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-13 17:33:06 +01:00
Ettore Di Giacinto
8d82afb595 fix(stablediffusion-ggml): enable oneapi before build (#4593)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-13 10:11:48 +01:00
Ettore Di Giacinto
aea71dd2c6 fix(stablediffusion-ggml): correctly enable sycl (#4591)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 22:07:01 +01:00
Ettore Di Giacinto
9fdb44323d chore(model gallery): add LocalAI-functioncall-phi-4-v0.2 (#4589)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 18:50:41 +01:00
Ettore Di Giacinto
6a299c04a7 feat(stablediffusion-ggml): respect build type (#4581)
* feat(stablediffusion-ggml): respect build type

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* combine libraries

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 18:33:51 +01:00
Ettore Di Giacinto
9ce71fe427 fix(gallery): correct UL typo
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 11:50:40 +01:00
Ettore Di Giacinto
e8de7b52da chore(model gallery): add LocalAI-functioncall-phi-4-v0.1 (#4588)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 11:26:42 +01:00
Ettore Di Giacinto
1780ccadbc chore(model gallery): add finemath-llama-3b (#4587)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 10:40:26 +01:00
Ettore Di Giacinto
f8cffd05e5 chore(model gallery): add negative_llama_70b (#4586)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 10:36:01 +01:00
Ettore Di Giacinto
b898cd49b5 chore(model gallery): add sky-t1-32b-preview (#4585)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-12 10:33:29 +01:00
LocalAI [bot]
7cd33d10c9 chore: ⬆️ Update ggerganov/llama.cpp to c05e8c9934f94fde49bc1bc9dc51eed282605150 (#4579)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-11 23:25:09 +01:00
Ettore Di Giacinto
cd480dbe5c chore(model gallery): add rombos-qwen2.5-writer-32b (#4584)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-11 23:24:55 +01:00
Ettore Di Giacinto
cb8bf79ada chore(model gallery): add qwq-32b-preview-ideawhiz-v1 (#4583)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-11 22:45:37 +01:00
Ettore Di Giacinto
b206eab80f chore(model gallery): add nightwing3-10b-v0.1 (#4582)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-11 22:41:30 +01:00
LocalAI [bot]
80dc23fab9 chore(model-gallery): ⬆️ update checksum (#4580)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-11 22:23:10 +01:00
LocalAI [bot]
844c0c422d docs: ⬆️ update docs version mudler/LocalAI (#4578)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-11 22:10:45 +01:00
LocalAI [bot]
07655c0c2e chore: ⬆️ Update ggerganov/llama.cpp to ba8a1f9c5b675459c55a83e3f97f10df3a66c788 (#4575)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-10 21:45:11 +00:00
Ettore Di Giacinto
bebfd19b45 chore(model gallery): add phi-3.5-moe-instruct (#4574)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-10 16:59:34 +01:00
Ettore Di Giacinto
6e34430d99 chore(model gallery): add chuluun-qwen2.5-72b-v0.01 (#4573)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-10 16:27:22 +01:00
Ettore Di Giacinto
0d08aaa29b chore(model gallery): add gwq-9b-preview2 (#4572)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-10 16:26:20 +01:00
Ettore Di Giacinto
66f9c06e7d docs: update README with langchain integration
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-10 09:20:33 +01:00
LocalAI [bot]
775adf871f chore: ⬆️ Update ggerganov/llama.cpp to 1204f9727005974587d6fc1dcd4d4f0ead87c856 (#4570)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-09 21:46:19 +00:00
Ettore Di Giacinto
a0fc19a3d6 chore(model gallery): add 70b-l3.3-cirrus-x1 (#4569)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-09 16:28:55 +01:00
Ettore Di Giacinto
7bd18662a7 chore(model gallery): add huatuogpt-o1-7b-v0.1 (#4568)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-09 16:01:57 +01:00
Ettore Di Giacinto
95b0739906 chore(model gallery): add minithinky-v2-1b-llama-3.2 (#4567)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-09 15:58:38 +01:00
Ettore Di Giacinto
cad7e9a1cd chore(model gallery): add 14b-qwen2.5-freya-x1 (#4566)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-09 15:55:56 +01:00
Ettore Di Giacinto
4426efab05 chore(deps): bump edgevpn to v0.29.0 (#4564)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-09 09:33:22 +01:00
Saarthak Verma
6765b17acd feat(dowloader): resume partial downloads (#4537)
* feat(resume downloads): add basic tests

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* test(resume downloads): implement file download tc

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* test(resume downloads): add resume partial download test

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* feat(resume downloads): implement resumable downloads for interrupted transfers

- Adds support for resuming partially downloaded files
- Uses HTTP Range header to continue from last byte position
- Maintains download progress across interruptions
- Preserves partial downloads with .partial extension
- Validates SHA256 checksum after completion

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* fix(resume downloads): incorrect download percent on front end

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* feat(resume download): add range header check tc

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* feat(resume download): implement range header check

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

---------

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>
2025-01-09 09:22:52 +01:00
Ettore Di Giacinto
ae1340d59b chore: update labeler.yml to include go files (#4565)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-09 09:20:51 +01:00
Ettore Di Giacinto
fc52f179fe chore(model gallery): add phi-4 (#4562)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-08 23:26:25 +01:00
LocalAI [bot]
4f43a9a162 chore: ⬆️ Update ggerganov/llama.cpp to 8d59d911711b8f1ba9ec57c4b192ccd2628af033 (#4561)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-08 21:43:56 +00:00
Ettore Di Giacinto
20edd44463 chore(model gallery): add dolphin3.0-qwen2.5-3b (#4560)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-08 11:57:25 +01:00
Ettore Di Giacinto
1a4f9d8453 chore(model gallery): add dolphin3.0-qwen2.5-1.5b (#4559)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-08 11:55:44 +01:00
Ettore Di Giacinto
f2dd33b8f4 chore(model gallery): add dolphin3.0-qwen2.5-0.5b (#4558)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-08 11:53:46 +01:00
LocalAI [bot]
25e988868c chore: ⬆️ Update ggerganov/llama.cpp to 53ff6b9b9fb25ed0ec0a213e05534fe7c3d0040f (#4556)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-08 11:51:14 +01:00
Ettore Di Giacinto
ab344e4f47 docs: update compatibility-table.md (#4557)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-07 21:20:44 +01:00
Ettore Di Giacinto
fac7893dd6 chore(model gallery): add dolphin3.0-llama3.2-3b (#4555)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-07 20:27:29 +01:00
Ettore Di Giacinto
9be338cfe4 chore(model gallery): add dolphin3.0-llama3.2-1b (#4554)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-07 20:24:21 +01:00
Ettore Di Giacinto
b4d4f96919 chore(model gallery): add dolphin3.0-llama3.1-8b (#4553)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-07 20:23:35 +01:00
Max Goltzsche
8cc2d01caa feat(ui): path prefix support via HTTP header (#4497)
Makes the web app honour the `X-Forwarded-Prefix` HTTP request header that may be sent by a reverse-proxy in order to inform the app that its public routes contain a path prefix.
For instance this allows to serve the webapp via a reverse-proxy/ingress controller under a path prefix/sub path such as e.g. `/localai/` while still being able to use the regular LocalAI routes/paths without prefix when directly connecting to the LocalAI server.

Changes:
* Add new `StripPathPrefix` middleware to strip the path prefix (provided with the `X-Forwarded-Prefix` HTTP request header) from the request path prior to matching the HTTP route.
* Add a `BaseURL` utility function to build the base URL, honouring the `X-Forwarded-Prefix` HTTP request header.
* Generate the derived base URL into the HTML (`head.html` template) as `<base/>` tag.
* Make all webapp-internal URLs (within HTML+JS) relative in order to make the browser resolve them against the `<base/>` URL specified within each HTML page's header.
* Make font URLs within the CSS files relative to the CSS file.
* Generate redirect location URLs using the new `BaseURL` function.
* Use the new `BaseURL` function to generate absolute URLs within gallery JSON responses.

Closes #3095

TL;DR:
The header-based approach allows to move the path prefix configuration concern completely to the reverse-proxy/ingress as opposed to having to align the path prefix configuration between LocalAI, the reverse-proxy and potentially other internal LocalAI clients.
The gofiber swagger handler already supports path prefixes this way, see e2d9e9916d/swagger.go (L79)

Signed-off-by: Max Goltzsche <max.goltzsche@gmail.com>
2025-01-07 17:18:21 +01:00
LocalAI [bot]
bf37eebecb chore: ⬆️ Update ggerganov/llama.cpp to ecebbd292d741ac084cf248146b2cfb17002aa1d (#4552)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-07 10:14:07 +01:00
dependabot[bot]
3f0850b58b chore(deps): Bump docs/themes/hugo-theme-relearn from d25f856 to 80e448e (#4549)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `d25f856` to `80e448e`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](d25f856477...80e448e5bd)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-06 20:20:34 +00:00
Ettore Di Giacinto
2ffa89b8b9 chore(model gallery): add 14b-qwen2.5-kunou-v1 (#4547)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-06 10:43:09 +01:00
Ettore Di Giacinto
d43adc0205 chore(model gallery): add triangulum-10b (#4546)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-06 10:42:45 +01:00
Ettore Di Giacinto
78b34505ab chore(model gallery): add 32b-qwen2.5-kunou-v1 (#4545)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-06 10:08:35 +01:00
LocalAI [bot]
e55a1bed59 chore: ⬆️ Update ggerganov/llama.cpp to b56f079e28fda692f11a8b59200ceb815b05d419 (#4544)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-05 21:43:06 +00:00
Ettore Di Giacinto
0d7550ad54 chore(deps): bump grpcio to 1.69.0 (#4543)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-05 15:01:49 +01:00
Ettore Di Giacinto
b5992255ac chore(model gallery): add qwentile2.5-32b-instruct (#4541)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-05 09:22:00 +01:00
Ettore Di Giacinto
e845cc0401 chore(model gallery): add llama-deepsync-3b (#4540)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-05 09:19:05 +01:00
Ettore Di Giacinto
a10033e8a4 chore(model gallery): add experimental-lwd-mirau-rp-14b-iq-imatrix (#4539)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-05 09:12:12 +01:00
LocalAI [bot]
6c6d840e6b chore: ⬆️ Update ggerganov/llama.cpp to 9394bbd484f802ce80d2858033583af3ef700d25 (#4536)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-04 21:43:08 +00:00
Ettore Di Giacinto
a8b3b3d6f4 chore(model gallery): add llama3.1-8b-prm-deepseek-data (#4535)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-04 09:48:34 +01:00
Ettore Di Giacinto
ec66f7e3b1 chore(model gallery): add codepy-deepthink-3b (#4534)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-04 09:45:07 +01:00
Ettore Di Giacinto
05841c2435 chore(model gallery): add drt-o1-7b (#4533)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-04 09:44:14 +01:00
Ettore Di Giacinto
c553d73748 chore(deps): bump llama.cpp to 4b0c638b9 (#4532)
deps(llama.cpp): bump to 4b0c638b9

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-04 09:40:08 +01:00
Ettore Di Giacinto
1006e8a2ed ci: disable arm jobs
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-03 21:58:04 +01:00
Ettore Di Giacinto
9bcfda171b ci: lower concurrent jobs
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-03 20:48:23 +01:00
Ettore Di Giacinto
baee4f7bd5 ci: split jobs
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-03 19:23:05 +01:00
Ettore Di Giacinto
286dc32fe0 ci(arm64): try building on self-hosted
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-03 19:18:18 +01:00
Ettore Di Giacinto
36e4c0fcf0 chore(model gallery): add nera_noctis-12b (#4530)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-03 09:10:03 +01:00
LocalAI [bot]
3c21c8789a chore: ⬆️ Update ggerganov/llama.cpp to 2f0ee84b9b02d2a98742308026f060ebdc2423f1 (#4528)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-02 21:43:37 +00:00
Ettore Di Giacinto
d9facbcee9 chore(model gallery): add l3.1-purosani-2-8b (#4527)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-02 10:46:11 +01:00
Ettore Di Giacinto
930280ecac chore(model gallery): add sainemo-remix (#4526)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-02 10:46:01 +01:00
Ettore Di Giacinto
3415e6ae74 chore(model gallery): add qwenwify2.5-32b-v4.5 (#4525)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-02 10:45:52 +01:00
Ettore Di Giacinto
f1082f3c6d chore(model gallery): add violet_twilight-v0.2 (#4524)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-01 14:41:48 +01:00
Ettore Di Giacinto
f345f7a795 chore(model gallery): add captain-eris-diogenes_twilight-v0.420-12b (#4523)
chore(model gallery): add captain-eris-diogenes_twilight-v0.420-12b-arm-imatrix

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-01 13:33:39 +01:00
Ettore Di Giacinto
1a2a7a57b3 chore(model gallery): add mn-12b-mag-mell-r1-iq-arm-imatrix (#4522)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-01 13:27:13 +01:00
Ettore Di Giacinto
ae80a2bd24 chore(model gallery): add smallthinker-3b-preview (#4521)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-01 13:26:48 +01:00
LocalAI [bot]
c30ecdd535 chore: ⬆️ Update ggerganov/llama.cpp to 0827b2c1da299805288abbd556d869318f2b121e (#4520)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-31 21:43:29 +00:00
Ettore Di Giacinto
f16c7cef92 chore(model gallery): add q2.5-veltha-14b-0.5 (#4519)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-31 11:23:29 +01:00
Ettore Di Giacinto
e1dd78bcea chore(model gallery): add huatuogpt-o1-8b (#4518)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-31 11:23:07 +01:00
dependabot[bot]
25acb0cbbc chore(deps): Bump docs/themes/hugo-theme-relearn from ec88e24 to d25f856 (#4515)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `ec88e24` to `d25f856`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](ec88e24f46...d25f856477)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-31 11:18:51 +01:00
LocalAI [bot]
7674c80bb6 chore: ⬆️ Update ggerganov/llama.cpp to 716bd6dec3e044e5c325386b5b0483392b24cefe (#4516)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-31 11:18:33 +01:00
Ettore Di Giacinto
e044970a5b chore(model gallery): add qwen2.5-32b-rp-ink (#4517)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-31 11:17:29 +01:00
LocalAI [bot]
639526d207 chore: ⬆️ Update leejet/stable-diffusion.cpp to dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a (#4509)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-30 21:44:53 +00:00
dependabot[bot]
998ff9fa22 chore(deps): Bump gradio from 3.48.0 to 5.9.1 in /backend/python/openvoice (#4514)
chore(deps): Bump gradio in /backend/python/openvoice

Bumps [gradio](https://github.com/gradio-app/gradio) from 3.48.0 to 5.9.1.
- [Release notes](https://github.com/gradio-app/gradio/releases)
- [Changelog](https://github.com/gradio-app/gradio/blob/main/CHANGELOG.md)
- [Commits](https://github.com/gradio-app/gradio/compare/gradio@3.48.0...gradio@5.9.1)

---
updated-dependencies:
- dependency-name: gradio
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-30 20:05:45 +00:00
LocalAI [bot]
7122c7472e chore: ⬆️ Update ggerganov/llama.cpp to a813badbbdf0d38705f249df7a0c99af5cdee678 (#4512)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-29 21:42:38 +00:00
LocalAI [bot]
671381267a chore: ⬆️ Update ggerganov/llama.cpp to f865ea149d71ef883e3780fced8a20a1464eccf4 (#4510)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-28 21:43:05 +00:00
Ettore Di Giacinto
d1762e098e chore(model gallery): add miscii-14b-1225 (#4508)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-28 10:56:59 +01:00
Ettore Di Giacinto
270d33504b chore(model gallery): add miscii-14b-1028 (#4507)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-28 10:54:47 +01:00
Ettore Di Giacinto
9b0983d027 chore(model gallery): add control-nanuq-8b (#4506)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-28 10:49:53 +01:00
Ettore Di Giacinto
afd0af987d Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-27 15:17:02 +01:00
Ettore Di Giacinto
58524d40c9 Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-27 15:13:06 +01:00
Ettore Di Giacinto
2a7222c6aa chore(model gallery): add falcon3-7b-instruct-abliterated (#4504)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-27 11:29:34 +01:00
Ettore Di Giacinto
0093985e7c chore(model gallery): add falcon3-10b-instruct-abliterated (#4503)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-27 11:24:13 +01:00
Ettore Di Giacinto
7f51e2dddf chore(model gallery): add falcon3-3b-instruct-abliterated (#4502)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-27 11:20:10 +01:00
Ettore Di Giacinto
f3bbdef77d chore(model gallery): add falcon3-1b-instruct-abliterated (#4501)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-27 11:15:56 +01:00
LocalAI [bot]
9cbf168dc0 chore: ⬆️ Update ggerganov/llama.cpp to d79d8f39b4da6deca4aea8bf130c6034c482b320 (#4500)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-26 21:44:36 +00:00
Ettore Di Giacinto
9572f0577b chore(model gallery): add teleut-7b-rp (#4499)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-26 10:52:57 +01:00
Ettore Di Giacinto
1a14c7d45a chore(model gallery): add qvq-72b-preview (#4498)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-26 10:47:54 +01:00
LocalAI [bot]
5c29e0cd4d chore: ⬆️ Update ggerganov/llama.cpp to 9ba399dfa7f115effc63d48e6860a94c9faa31b2 (#4496)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-25 21:43:06 +00:00
Ettore Di Giacinto
1a74af1492 chore(model gallery): add llama-3.1-8b-open-sft (#4495)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-25 11:47:33 +01:00
Ettore Di Giacinto
8f6332ab23 chore(model gallery): add dans-personalityengine-v1.1.0-12b (#4494)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-25 11:47:22 +01:00
Ettore Di Giacinto
816ae7a53a chore(model gallery): add fastllama-3.2-1b-instruct (#4493)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-25 11:47:11 +01:00
LocalAI [bot]
1d630e4185 chore(model-gallery): ⬆️ update checksum (#4492)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-24 23:04:50 +01:00
LocalAI [bot]
bc8dd3ad14 chore: ⬆️ Update ggerganov/llama.cpp to 2cd43f4900ba0e34124fdcbf02a7f9df25a10a3d (#4491)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-24 21:44:11 +00:00
Ettore Di Giacinto
b969053701 chore(gallery): re-order
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-24 11:10:56 +01:00
Ettore Di Giacinto
60bf7c9dd7 chore(model gallery): add rombos-llm-70b-llama-3.3 (#4490)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-24 11:04:31 +01:00
Ettore Di Giacinto
d65c10cee7 chore(model gallery): add tqwendo-36b (#4489)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-24 11:04:19 +01:00
Ettore Di Giacinto
6c71698299 chore(model gallery): add l3.3-ms-evalebis-70b (#4488)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-24 10:59:36 +01:00
LocalAI [bot]
c7c275c7c8 chore(model-gallery): ⬆️ update checksum (#4487)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-24 10:56:02 +01:00
LocalAI [bot]
d0adbee75d chore: ⬆️ Update ggerganov/llama.cpp to 32d6ee6385b3fc908b283f509b845f757a6e7206 (#4486)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-24 10:55:30 +01:00
dependabot[bot]
159a7f6df2 chore(deps): Bump docs/themes/hugo-theme-relearn from bd1f3d3 to ec88e24 (#4460)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `bd1f3d3` to `ec88e24`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](bd1f3d3432...ec88e24f46)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-23 22:15:38 +00:00
Ettore Di Giacinto
0eb2911aad chore(llava): update clip.patch (#4453)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-23 19:11:31 +01:00
Ettore Di Giacinto
cab9f88ca4 chore(docs): add nvidia l4t instructions (#4454)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-23 18:59:33 +01:00
Ettore Di Giacinto
a3b675b09e Delete .cirrus.yml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-23 18:31:50 +01:00
Ettore Di Giacinto
6477913e8f chore(ci): increase task timeout
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-23 16:43:32 +01:00
Ettore Di Giacinto
138cd97ce7 chore(ci): try to add CirrusCI to build arm64 images natively
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-23 15:37:57 +01:00
Ettore Di Giacinto
4dd9ac39b0 chore(ci): comment arm64 job until we find a native CI runner (#4452)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-23 12:34:39 +01:00
LocalAI [bot]
23499ddc8a chore: ⬆️ Update ggerganov/llama.cpp to ebdee9478ca7ba65497b9b96f7457698c6ee5115 (#4451)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-22 22:56:41 +00:00
Ettore Di Giacinto
8864156300 chore(nvidia-l4t): add l4t arm64 images (#4449)
chore(nvidia-l4t): add nvidia-l4t arm64 images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-22 21:29:33 +01:00
Ettore Di Giacinto
478014ca18 feat(Dockerfile): allow to skip driver installation (#4447)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-22 21:28:38 +01:00
Ettore Di Giacinto
d45477b003 chore(model gallery): add llama-3.3-70b-instruct-ablated (#4448)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-22 08:38:47 +01:00
Ettore Di Giacinto
396fb88e33 chore(model gallery): add anubis-70b-v1 (#4446)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-22 08:33:35 +01:00
LocalAI [bot]
a429ec1b3f chore: ⬆️ Update ggerganov/llama.cpp to 5cd85b5e008de2ec398d6596e240187d627561e3 (#4445)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-21 21:44:44 +00:00
Ettore Di Giacinto
5b5fb9c22a chore(model gallery): add orca_mini_v8_1_70b (#4444)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-21 10:39:58 +01:00
LocalAI [bot]
801a87c3a6 chore: ⬆️ Update ggerganov/llama.cpp to eb5c3dc64bd967f2e23c87d9dec195f45468de60 (#4442)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-20 21:48:03 +00:00
Ettore Di Giacinto
badbd212f7 chore(model gallery): add tq2.5-14b-neon-v1 (#4441)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-20 16:11:16 +01:00
Ettore Di Giacinto
c4bbecc4d6 chore(model gallery): add tq2.5-14b-aletheia-v1 (#4440)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-20 16:08:23 +01:00
Ettore Di Giacinto
8a08e9ec67 fix(openvoice): pin numpy before installing torch (#4439)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-20 10:34:23 +01:00
LocalAI [bot]
61e486dbf5 chore: ⬆️ Update ggerganov/llama.cpp to d408bb9268a988c5a60a5746d3a6430386e7604d (#4437)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-19 23:03:47 +00:00
Ettore Di Giacinto
f2f387e1dd fix(openvoice): do not pin numpy (#4438)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-19 21:30:43 +01:00
Ettore Di Giacinto
3be9a08fc9 fix(deps): pin openvoice pytorch/torchaudio (#4436)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-19 18:24:33 +01:00
Ettore Di Giacinto
b325807c60 fix(intel): pin torch and intel-extensions (#4435)
* fix(intel): pin torch version

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(intel): pin intel packages version

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-19 15:39:32 +01:00
jtwolfe
ae9855a39e chore(docs): patch p2p detail in env and docs (#4434)
* Update distributed_inferencing.md

Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com>

* Update .env

Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com>

* Update distributed_inferencing.md

whoops

Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com>

---------

Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-19 15:19:31 +01:00
LocalAI [bot]
9ac62b589f chore: ⬆️ Update ggerganov/llama.cpp to cd920d0ac38ec243605a5a57c50941140a193f9e (#4433)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-19 12:15:30 +00:00
Ettore Di Giacinto
d12660a286 chore(model gallery): add llama-chat-summary-3.2-3b (#4432)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-19 09:56:19 +01:00
LocalAI [bot]
3d3bd2d10f chore: ⬆️ Update ggerganov/llama.cpp to 0bf2d10c5514ff61b99897a4a5054f846e384e1e (#4429)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-19 09:53:49 +01:00
Ettore Di Giacinto
b656d10556 chore(model gallery): add llama-song-stream-3b-instruct (#4431)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-19 09:48:33 +01:00
Ettore Di Giacinto
8c67f38ef6 chore(model gallery): add falcon3-10b-instruct (#4426)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-18 10:36:41 +01:00
Ettore Di Giacinto
4623728cd7 chore(model gallery): add qwen2-vl-72b-instruct (#4425)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-18 10:35:49 +01:00
Ettore Di Giacinto
5f804aa6e8 chore(model gallery): add falcon3-3b-instruct (#4424)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-18 10:32:31 +01:00
Ettore Di Giacinto
f52c6e3a31 chore(model gallery): add falcon3-1b-instruct (#4423)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-18 10:12:06 +01:00
Ettore Di Giacinto
0b4bb7a562 chore(model gallery): add llama-openreviewer-8b (#4422)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-18 09:49:48 +01:00
mintyleaf
2bc4b56a79 feat: stream tokens usage (#4415)
* Use pb.Reply instead of []byte with Reply.GetMessage() in llama grpc to get the proper usage data in reply streaming mode at the last [DONE] frame

* Fix 'hang' on empty message from the start

Seems like that empty message marker trick was unnecessary

---------

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-18 09:48:50 +01:00
LocalAI [bot]
fc920cc58a chore: ⬆️ Update ggerganov/llama.cpp to 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 (#4421)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-17 22:15:14 +00:00
Ettore Di Giacinto
fdb560b8e5 chore(model gallery): add qwq-lcot-7b-instruct (#4419)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-17 10:10:37 +01:00
Ettore Di Giacinto
708cba0c1b chore(llama.cpp): bump, drop penalize_nl (#4418)
deps(llama.cpp): bump, drop penalize_nl

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-17 00:47:52 +01:00
Ettore Di Giacinto
24abf568cb chore(tests): stabilize tts test (#4417)
chore(tests): stabilize test

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-17 00:46:48 +01:00
Ettore Di Giacinto
7ca0e2d925 fix(python): remove pin to setuptools, pin python version (#4395)
fix(setuptools): remove pin

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-16 10:55:02 +01:00
Ettore Di Giacinto
037e8030bf chore(model gallery): add qwen2-7b-multilingual-rp (#4394)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-16 09:48:33 +01:00
Ettore Di Giacinto
472d11f884 chore(model gallery): add marco-o1-uncensored (#4393)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-16 09:48:23 +01:00
Ettore Di Giacinto
b40d5d12b7 chore(model gallery): add naturallm-7b-instruct (#4392)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-16 09:47:49 +01:00
LocalAI [bot]
6938618e30 chore: ⬆️ Update ggerganov/llama.cpp to a0974156f334acf8af5858d7ede5ab7d7490d415 (#4391)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-15 22:01:44 +00:00
Ettore Di Giacinto
5d9c530eaa fix(gallery): disable default embeddings
Do not always enable embeddings on llama32, but let specific models settings

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-15 18:43:39 +01:00
Ettore Di Giacinto
9429a53db7 chore(model gallery): add neumind-math-7b-instruct (#4388)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-15 10:07:56 +01:00
Ettore Di Giacinto
1d6d301370 chore(model gallery): add fusechat-llama-3.1-8b-instruct (#4387)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-15 10:07:42 +01:00
Ettore Di Giacinto
8f2be82667 chore(model gallery): add fusechat-llama-3.2-3b-instruct (#4386)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-15 10:07:30 +01:00
LocalAI [bot]
cca911f3e5 chore: ⬆️ Update ggerganov/llama.cpp to e52aba537a34d51a65cddec6bc6dafc9031edc63 (#4385)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-15 09:59:20 +01:00
Jason Godsey
e37bbbaacc fix: correct gallery/index.yaml (#4384)
Update index.yaml

DBG YAML errors: line 2025: cannot unmarshal !!str `-https:...` into []string

Signed-off-by: Jason Godsey <godsey@users.noreply.github.com>
2024-12-14 21:25:51 +01:00
Ettore Di Giacinto
59cbf38b4b fix(gallery): correct syntax typo
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-14 21:21:27 +01:00
Ettore Di Giacinto
432c31d904 chore(model gallery): add chronos-gold-12b-1.0 (#4381)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-14 11:27:25 +01:00
Ettore Di Giacinto
af33483687 chore(model gallery): add fusechat-qwen-2.5-7b-instruct (#4380)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-14 11:27:11 +01:00
Ettore Di Giacinto
5051074845 chore(model gallery): add fusechat-gemma-2-9b-instruct (#4379)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-14 11:26:40 +01:00
Ettore Di Giacinto
fc4a714992 feat(llama.cpp): bump and adapt to upstream changes (#4378)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-14 00:30:52 +01:00
Ettore Di Giacinto
0429e00746 chore(model gallery): add hermes-3-llama-3.2-3b (#4376)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-13 09:51:26 +01:00
Ettore Di Giacinto
73f1f25b9a chore(model gallery): add evathene-v1.3 (#4375)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-13 09:51:13 +01:00
Ettore Di Giacinto
044570fa85 chore(model gallery): add l3.3-ms-evayale-70b (#4374)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-13 09:50:41 +01:00
LocalAI [bot]
37527420de chore: ⬆️ Update ggerganov/llama.cpp to 274ec65af6e54039eb95cb44904af5c945dca1fa (#4372)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-12 21:44:54 +00:00
Ettore Di Giacinto
1854b8c612 chore(model gallery): add l3.3-70b-euryale-v2.3 (#4371)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-12 12:22:48 +01:00
Ettore Di Giacinto
b8824f2ad9 chore(model gallery): add deepthought-8b-llama-v0.01-alpha (#4370)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-12 12:07:57 +01:00
Ettore Di Giacinto
3ab83e91df chore(model gallery): add 72b-qwen2.5-kunou-v1 (#4369)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-12 12:07:41 +01:00
LocalAI [bot]
f2cb261797 chore: ⬆️ Update ggerganov/llama.cpp to 235f6e14bf0ed0211c51aeff14139038ae1000aa (#4366)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-12 09:23:36 +01:00
Ettore Di Giacinto
c85f46a71d chore(model gallery): add sailor2-20b-chat (#4365)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 10:55:04 +01:00
Ettore Di Giacinto
75b283d83c chore(model gallery): add sailor2-8b-chat (#4364)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 10:51:39 +01:00
Ettore Di Giacinto
1918efdfdd chore(model gallery): add sailor2-1b-chat (#4363)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 10:32:18 +01:00
LocalAI [bot]
ec239a0cd0 docs: ⬆️ update docs version mudler/LocalAI (#4359)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-11 10:04:16 +01:00
LocalAI [bot]
b74a936178 chore: ⬆️ Update ggerganov/llama.cpp to dafae66cc242eb766797194d3c85c5e502625623 (#4360)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-10 21:45:42 +00:00
Ettore Di Giacinto
de1ddb8ba6 chore(model gallery): add b-nimita-l3-8b-v0.02 (#4357)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-10 09:42:47 +01:00
Ettore Di Giacinto
272763f625 chore(model gallery): add intellect-1-instruct (#4356)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-10 09:42:37 +01:00
Ettore Di Giacinto
3aff87a5cf chore(model gallery): add qwen2.5-math-14b-instruct (#4355)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-10 09:42:24 +01:00
LocalAI [bot]
885118e863 chore: ⬆️ Update ggerganov/llama.cpp to 26a8406ba9198eb6fdd8329fa717555b4f77f05f (#4353)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-10 09:10:58 +01:00
dependabot[bot]
a03a9b9e51 chore(deps): Bump docs/themes/hugo-theme-relearn from be85052 to bd1f3d3 (#4348)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `be85052` to `bd1f3d3`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](be85052efe...bd1f3d3432)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-09 20:09:26 +00:00
Ettore Di Giacinto
f45d6c746a chore(model gallery): add tulu-3.1-8b-supernova-smart (#4347)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-09 15:58:29 +01:00
Ettore Di Giacinto
5eceb5f67c chore(model gallery): add impish_mind_8b (#4344)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-09 10:24:30 +01:00
Ettore Di Giacinto
a9c0dd3a1e chore(model gallery): add qwen2.5-7b-homeranvita-nerdmix (#4343)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-09 10:24:15 +01:00
LocalAI [bot]
fb17e737f0 docs: ⬆️ update docs version mudler/LocalAI (#4341)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-09 09:19:25 +01:00
LocalAI [bot]
b5a21202ed chore: ⬆️ Update ggerganov/llama.cpp to e52522b8694ae73abf12feb18d29168674aa1c1b (#4342)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-08 22:54:06 +00:00
Ettore Di Giacinto
e147f1bd3e chore(model gallery): add bio-medical-llama-3-8b (#4339)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 18:43:26 +01:00
Ettore Di Giacinto
61839efed2 chore(model gallery): add virtuoso-small (#4338)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 18:01:25 +01:00
Ettore Di Giacinto
a0fe050055 chore(model gallery): add mn-chunky-lotus-12b (#4337)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 18:01:16 +01:00
Ettore Di Giacinto
f943c4b803 Revert "feat: include tokens usage for streamed output" (#4336)
Revert "feat: include tokens usage for streamed output (#4282)"

This reverts commit 0d6c3a7d57.
2024-12-08 17:53:36 +01:00
Ettore Di Giacinto
cea5a0ea42 feat(template): read jinja templates from gguf files (#4332)
* Read jinja templates as fallback

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move templating out of model loader

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Test TemplateMessages

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Set role and content from transformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tests: be more flexible

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* More jinja

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small refactoring and adaptations

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 13:50:33 +01:00
LocalAI [bot]
f5e1527a5a chore: ⬆️ Update ggerganov/llama.cpp to 3573fa8e7b7f0865638b52b4e9b4d2006f0558a2 (#4335)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-07 21:51:45 +00:00
Ettore Di Giacinto
7184ca546f chore(model gallery): add llama-3.3-70b-instruct (#4333)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-07 10:39:20 +01:00
LocalAI [bot]
5592f5e820 chore: ⬆️ Update ggerganov/llama.cpp to c5ede3849fc021174862f9c0bf8273808d8f0d39 (#4330)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-06 21:46:51 +00:00
Ettore Di Giacinto
d4c1746c7d feat(llama.cpp): expose cache_type_k and cache_type_v for quant of kv cache (#4329)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-06 10:23:59 +01:00
LocalAI [bot]
88737e1d76 chore: ⬆️ Update ggerganov/llama.cpp to c9c6e01daedac542b174c235872569fce5385982 (#4328)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-06 09:15:21 +01:00
LocalAI [bot]
ba225f660b docs: ⬆️ update docs version mudler/LocalAI (#4327)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-05 21:54:00 +00:00
Ettore Di Giacinto
3127cd1352 chore(docs): update available backends (#4325)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 16:57:56 +01:00
PetrFlegr
b90d78d9f6 Updated links of yamls (#4324)
Updated links

Links to deplyment*.yaml was changed

Signed-off-by: PetrFlegr <ptrflegr@gmail.com>
2024-12-05 16:06:51 +01:00
Ettore Di Giacinto
b86a3e4fa6 chore(model gallery): add math-iio-7b-instruct (#4323)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 10:05:35 +01:00
Ettore Di Giacinto
be907d993f chore(model gallery): add loki-v2.6-8b-1024k (#4321)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 10:02:02 +01:00
Ettore Di Giacinto
ab0f8648a3 chore(model gallery): add rp-naughty-v1.0c-8b (#4322)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 10:01:49 +01:00
LocalAI [bot]
c226149503 chore: ⬆️ Update leejet/stable-diffusion.cpp to 9578fdcc4632dc3de5565f28e2fb16b7c18f8d48 (#4320)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-05 09:09:11 +01:00
LocalAI [bot]
4a079f893c chore: ⬆️ Update ggerganov/llama.cpp to 59f4db10883a4f3e855cffbf2c3ab68430e95272 (#4319)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-04 22:19:35 +00:00
Ettore Di Giacinto
87b7648591 chore(ci): set auto-labeler for dependencies
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-04 18:35:54 +01:00
Ettore Di Giacinto
cf4f024420 Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-04 11:31:08 +01:00
Ettore Di Giacinto
3c0ac49d90 chore(model gallery): add bark-cpp-small (#4318)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-04 09:32:42 +01:00
Ettore Di Giacinto
4307ae5d52 chore(model gallery): add flux.1-dev-ggml (#4317)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-04 09:26:51 +01:00
Ettore Di Giacinto
50f71f73d7 chore(model gallery): add homercreativeanvita-mix-qw7b (#4316)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-04 09:17:23 +01:00
Ettore Di Giacinto
dc04a43868 chore(model gallery): add chatty-harry_v3.0 (#4315)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-04 09:15:47 +01:00
Ettore Di Giacinto
cc04b62d3a chore(model gallery): add cybercore-qwen-2.1-7b (#4314)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-04 09:15:35 +01:00
LocalAI [bot]
feb54e65c2 chore: ⬆️ Update ggerganov/llama.cpp to cc98896db858df7aa40d0e16a505883ef196a482 (#4312)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-03 23:53:11 +00:00
Ettore Di Giacinto
44a5dac312 feat(backend): add stablediffusion-ggml (#4289)
* feat(backend): add stablediffusion-ggml

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(ci): track stablediffusion-ggml

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use default scheduler and sampler if not specified

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move cfg scale out of diffusers block

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make it working

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: set free_params_immediately to false to call the model in sequence

https://github.com/leejet/stable-diffusion.cpp/issues/366

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-03 22:41:22 +01:00
Ettore Di Giacinto
074b52bbfe chore(model gallery): add bggpt-gemma-2-2.6b-it-v1.0 (#4311)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-03 18:56:53 +01:00
Ettore Di Giacinto
236a60bab8 chore(model gallery): add qwen2.5-7b-homercreative-mix (#4310)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-03 18:56:03 +01:00
Ettore Di Giacinto
7b70f0543b chore(model gallery): add sparse-llama-3.1-8b-2of4 (#4309)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-03 18:55:49 +01:00
LocalAI [bot]
5f33962932 chore: ⬆️ Update ggerganov/llama.cpp to 8648c521010620c2daccfa1d26015c668ba2c717 (#4307)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-03 09:20:59 +01:00
dependabot[bot]
45b91d501e chore(deps): Bump docs/themes/hugo-theme-relearn from 28fce6b to be85052 (#4305)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `28fce6b` to `be85052`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](28fce6b04c...be85052efe)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-02 22:47:37 +00:00
Ettore Di Giacinto
e51792784a chore(deps): bump grpcio to 1.68.1 (#4301)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-02 19:13:26 +01:00
LocalAI [bot]
28594336e9 chore: ⬆️ Update ggerganov/llama.cpp to 5e1ed95583ca552a98d8528b73e1ff81249c2bf9 (#4299)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-01 21:44:59 +00:00
Ettore Di Giacinto
9c9359fc96 chore(model gallery): add teleut-7b (#4298)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-01 10:12:48 +01:00
Ettore Di Giacinto
bc5d1f255b chore(model gallery): add skywork-o1-open-llama-3.1-8b (#4297)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-01 10:12:35 +01:00
Ettore Di Giacinto
0fcefbc168 chore(model gallery): add volare-i1 (#4296)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-01 10:12:17 +01:00
LocalAI [bot]
9044b17e4d chore: ⬆️ Update ggerganov/llama.cpp to 0c39f44d70d058940fe2afe50cfc789e3e44d756 (#4295)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-11-30 21:46:07 +00:00
Ettore Di Giacinto
ad31daf03b chore(model gallery): add qwestion-24b (#4294)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-30 11:18:25 +01:00
Ettore Di Giacinto
1167487f5e chore(model gallery): add freyja-v4.95-maldv-7b-non-fiction-i1 (#4293)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-30 11:15:05 +01:00
Ettore Di Giacinto
61358e4d35 chore(model gallery): add q2.5-32b-slush-i1 (#4292)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-30 11:02:41 +01:00
LocalAI [bot]
2c8a87b1e4 chore: ⬆️ Update ggerganov/llama.cpp to 3a8e9af402f7893423bdab444aa16c5d9a2d429a (#4290)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-11-29 21:58:24 +00:00
LocalAI [bot]
55aad5f525 chore: ⬆️ Update ggerganov/llama.cpp to dc22344088a7ee81a1e4f096459b03a72f24ccdc (#4288)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-11-28 23:04:31 +00:00
Ettore Di Giacinto
58ff47de26 feat(bark-cpp): add new bark.cpp backend (#4287)
* feat(bark-cpp): add new bark.cpp backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* build on linux only for now

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* track bark.cpp in CI bumps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop old entries from bumper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* No need to test rwkv specifically, now part of llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-28 22:16:44 +01:00
mintyleaf
0d6c3a7d57 feat: include tokens usage for streamed output (#4282)
Use pb.Reply instead of []byte with Reply.GetMessage() in llama grpc to get the proper usage data in reply streaming mode at the last [DONE] frame

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-11-28 14:47:56 +01:00
Ettore Di Giacinto
e001fada6c chore(model gallery): add llama-smoltalk-3.2-1b-instruct (#4285)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-28 09:35:38 +01:00
Ettore Di Giacinto
f4547fcf8a chore(model gallery): add qwq-32b-preview (#4284)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-28 09:34:44 +01:00
Ettore Di Giacinto
7b75e9de2d fix(rwkv model): add stoptoken (#4283)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-28 09:34:35 +01:00
Ettore Di Giacinto
cbedf2f428 fix(llama.cpp): embed metal file into result binary for darwin (#4279)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-28 04:17:00 +00:00
LocalAI [bot]
0597f3b9e9 chore: ⬆️ Update ggerganov/llama.cpp to 3ad5451f3b75809e3033e4e577b9f60bcaf6676a (#4280)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-11-27 22:24:47 +00:00
Ettore Di Giacinto
5f688d7a8d chore(model): add eva-qwen2.5-72b-v0.2 to the gallery (#4278)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-27 19:02:05 +01:00
Ettore Di Giacinto
fa20628b3a chore(model): add llama-3.1-tulu-3-8b-sft to the gallery (#4277)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-27 19:01:54 +01:00
Ettore Di Giacinto
13bf048cfc chore(model): add llama-3.1-tulu-3-70b-dpo model config (#4276)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-27 18:44:53 +01:00
Ettore Di Giacinto
bdd6920910 chore(model): add q2.5-ms-mistoria-72b-v2 model config (#4275)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-27 16:56:55 +01:00
Ettore Di Giacinto
3c3050f68e feat(backends): Drop bert.cpp (#4272)
* feat(backends): Drop bert.cpp

use llama.cpp 3.2 as a drop-in replacement for bert.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): make test more robust

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-27 16:34:28 +01:00
LocalAI [bot]
1688ba7f2a chore: ⬆️ Update ggerganov/llama.cpp to 30ec39832165627dd6ed98938df63adfc6e6a21a (#4273)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-11-27 15:19:15 +01:00
Ettore Di Giacinto
e8128a339a chore(scripts): handle summarization errors (#4271)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-26 14:51:55 +01:00
Ettore Di Giacinto
369110e6bf chore(model): add rwkv-6-world-7b to the gallery (#4270)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-11-26 14:51:37 +01:00
592 changed files with 271619 additions and 39183 deletions

View File

@@ -1,23 +0,0 @@
meta {
name: musicgen
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model_id": "facebook/musicgen-small",
"text": "Exciting 80s Newscast Interstitial",
"duration_seconds": 8
}
}

View File

@@ -1,17 +0,0 @@
meta {
name: backend monitor
type: http
seq: 4
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
body: json
auth: none
}
body:json {
{
"model": "{{DEFAULT_MODEL}}"
}
}

View File

@@ -1,21 +0,0 @@
meta {
name: backend-shutdown
type: http
seq: 3
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}"
}
}

View File

@@ -1,5 +0,0 @@
{
"version": "1",
"name": "LocalAI Test Requests",
"type": "collection"
}

View File

@@ -1,6 +0,0 @@
vars {
HOST: localhost
PORT: 8080
DEFAULT_MODEL: gpt-3.5-turbo
PROTOCOL: http://
}

View File

@@ -1,11 +0,0 @@
meta {
name: get models list
type: http
seq: 2
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
body: none
auth: none
}

View File

@@ -1,25 +0,0 @@
meta {
name: Generate image
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"prompt": "<positive prompt>|<negative prompt>",
"model": "model-name",
"step": 51,
"size": "1024x1024",
"image": ""
}
}

View File

@@ -1,24 +0,0 @@
meta {
name: -completions
type: http
seq: 4
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"prompt": "function downloadFile(string url, string outputPath) {",
"max_tokens": 256,
"temperature": 0.5
}
}

View File

@@ -1,23 +0,0 @@
meta {
name: -edits
type: http
seq: 5
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"input": "What day of the wek is it?",
"instruction": "Fix the spelling mistakes"
}
}

View File

@@ -1,22 +0,0 @@
meta {
name: -embeddings
type: http
seq: 6
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
}
}

View File

@@ -1,30 +0,0 @@
meta {
name: chat completion -simple- 1 message-
type: http
seq: 4
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"messages": [
{
"role": "user",
"content": "How could one use friction to cook an egg?"
}
],
"max_tokens": 256,
"temperature": 0.2,
"grammar": ""
}
}

View File

@@ -1,29 +0,0 @@
meta {
name: chat-completions -long-
type: http
seq: 5
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
{"role": "user", "content": "How could one use electricity to cook an egg?"},
{"role": "assistant",
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
},
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
"max_tokens": 1024,
"temperature": 0.5
}
}

View File

@@ -1,25 +0,0 @@
meta {
name: chat-completions -stream-
type: http
seq: 6
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
"max_tokens": 256,
"temperature": 0.9,
"stream": true
}
}

View File

@@ -1,22 +0,0 @@
meta {
name: add model gallery
type: http
seq: 10
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
"name": "test"
}
}

View File

@@ -1,21 +0,0 @@
meta {
name: delete model gallery
type: http
seq: 11
}
delete {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"name": "test"
}
}

View File

@@ -1,11 +0,0 @@
meta {
name: list MODELS in galleries
type: http
seq: 7
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
body: none
auth: none
}

View File

@@ -1,11 +0,0 @@
meta {
name: list model GALLERIES
type: http
seq: 8
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: none
auth: none
}

View File

@@ -1,11 +0,0 @@
meta {
name: model delete
type: http
seq: 7
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: none
auth: none
}

View File

@@ -1,21 +0,0 @@
meta {
name: model gallery apply -gist-
type: http
seq: 12
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
}
}

View File

@@ -1,22 +0,0 @@
meta {
name: model gallery apply
type: http
seq: 9
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
"name": "codellama7b"
}
}

View File

Binary file not shown.

View File

@@ -1,16 +0,0 @@
meta {
name: transcribe
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
body: multipartForm
auth: none
}
body:multipart-form {
file: @file(transcription/gb1.ogg)
model: whisper-1
}

View File

@@ -1,22 +0,0 @@
meta {
name: -tts
type: http
seq: 2
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
}
}

View File

@@ -1,23 +0,0 @@
meta {
name: musicgen
type: http
seq: 2
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"backend": "transformers-musicgen",
"model": "facebook/musicgen-small",
"input": "80s Synths playing Jazz"
}
}

View File

@@ -6,8 +6,7 @@ services:
target: devcontainer
args:
- FFMPEG=true
- IMAGE_TYPE=extras
- GO_TAGS=stablediffusion p2p tts
- GO_TAGS=p2p tts
env_file:
- ../.env
ports:

20
.env
View File

@@ -29,6 +29,9 @@
## Enable/Disable single backend (useful if only one GPU is available)
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
## Specify a build type. Available: cublas, openblas, clblas.
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
@@ -38,12 +41,12 @@
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
## Enable go tags, available: stablediffusion, tts
## stablediffusion: image generation with stablediffusion
## Enable go tags, available: p2p, tts
## p2p: enable distributed inferencing
## tts: enables text-to-speech with go-piper
## (requires REBUILD=true)
#
# GO_TAGS=stablediffusion
# GO_TAGS=p2p
## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images
@@ -73,7 +76,7 @@
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
# LLAMACPP_GRPC_SERVERS=""
### Enable to run parallel requests
@@ -82,6 +85,15 @@
# Enable to allow p2p mode
# LOCALAI_P2P=true
# Enable to use federated mode
# LOCALAI_FEDERATED=true
# Enable to start federation server
# FEDERATED_SERVER=true
# Define to use federation token
# TOKEN=""
### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time

View File

@@ -29,10 +29,6 @@ updates:
schedule:
# Check for updates to GitHub Actions every weekday
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/autogptq"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/bark"
schedule:
@@ -65,10 +61,6 @@ updates:
directory: "/backend/python/openvoice"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/parler-tts"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/rerankers"
schedule:
@@ -81,14 +73,6 @@ updates:
directory: "/backend/python/transformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers-musicgen"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vall-e-x"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vllm"
schedule:

11
.github/labeler.yml vendored
View File

@@ -1,6 +1,15 @@
enhancements:
enhancement:
- head-branch: ['^feature', 'feature']
dependencies:
- any:
- changed-files:
- any-glob-to-any-file: 'Makefile'
- changed-files:
- any-glob-to-any-file: '*.mod'
- changed-files:
- any-glob-to-any-file: '*.sum'
kind/documentation:
- any:
- changed-files:

511
.github/workflows/backend.yml vendored Normal file
View File

@@ -0,0 +1,511 @@
---
name: 'build backend container images'
on:
push:
branches:
- master
tags:
- '*'
#pull_request:
concurrency:
group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
backend-jobs:
uses: ./.github/workflows/backend_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
backend: ${{ matrix.backend }}
dockerfile: $${ matrix.dockerfile }}
context: $${ matrix.context }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: false
#max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
matrix:
include:
# CUDA 11 builds
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "rerankers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-vllm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "vllm"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-transformers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "transformers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# CUDA 11 additional backends
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "kokoro"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-coqui'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "coqui"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-bark'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "bark"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "chatterbox"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# CUDA 12 builds
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "rerankers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "vllm"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "transformers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# CUDA 12 additional backends
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "kokoro"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "coqui"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-bark'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "bark"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "chatterbox"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# hipblas builds
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-rerankers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "rerankers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-vllm'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "vllm"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-transformers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "transformers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-diffusers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# ROCm additional backends
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-kokoro'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "kokoro"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-coqui'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "coqui"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-rocm-hipblas-bark'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
backend: "bark"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# sycl builds
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-rerankers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "rerankers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-rerankers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "rerankers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-vllm'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "vllm"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-vllm'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "vllm"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-transformers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "transformers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-transformers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "transformers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-diffusers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# SYCL additional backends
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-kokoro'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "kokoro"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-kokoro'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "kokoro"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-coqui'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "coqui"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-coqui'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "coqui"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f32-bark'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "bark"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-gpu-intel-sycl-f16-bark'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
backend: "bark"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# bark-cpp
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'true'
tag-suffix: '-bark-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
backend: "bark"
dockerfile: "./backend/Dockerfile.go"
context: "./"

235
.github/workflows/backend_build.yml vendored Normal file
View File

@@ -0,0 +1,235 @@
---
name: 'build python backend container images (reusable)'
on:
workflow_call:
inputs:
base-image:
description: 'Base image'
required: true
type: string
build-type:
description: 'Build type'
default: ''
type: string
cuda-major-version:
description: 'CUDA major version'
default: "12"
type: string
cuda-minor-version:
description: 'CUDA minor version'
default: "1"
type: string
platforms:
description: 'Platforms'
default: ''
type: string
tag-latest:
description: 'Tag latest'
default: ''
type: string
tag-suffix:
description: 'Tag suffix'
default: ''
type: string
runs-on:
description: 'Runs on'
required: true
default: ''
type: string
backend:
description: 'Backend to build'
required: true
type: string
context:
description: 'Build context'
required: true
type: string
dockerfile:
description: 'Build Dockerfile'
required: true
type: string
secrets:
dockerUsername:
required: true
dockerPassword:
required: true
quayUsername:
required: true
quayPassword:
required: true
jobs:
reusable_python_backend-build:
runs-on: ${{ inputs.runs-on }}
steps:
- name: Free Disk Space (Ubuntu)
if: inputs.runs-on == 'ubuntu-latest'
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Force Install GIT latest
run: |
sudo apt-get update \
&& sudo apt-get install -y software-properties-common \
&& sudo apt-get update \
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v4
- name: Release space from worker
if: inputs.runs-on == 'ubuntu-latest'
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get remove -y microsoft-edge-stable || true
sudo apt-get remove -y firefox || true
sudo apt-get remove -y powershell || true
sudo apt-get remove -y r-base-core || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /opt/ghc || true
sudo rm -rf "/usr/local/share/boost" || true
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
df -h
- name: Docker meta
id: meta
if: github.event_name != 'pull_request'
uses: docker/metadata-action@v5
with:
images: |
quay.io/go-skynet/local-ai-backends
localai/localai-backends
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }},onlatest=true
- name: Docker meta for PR
id: meta_pull_request
if: github.event_name == 'pull_request'
uses: docker/metadata-action@v5
with:
images: |
quay.io/go-skynet/ci-tests
tags: |
type=ref,event=branch,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
type=semver,pattern={{raw}},suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }},onlatest=true
## End testing image
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@master
- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
username: ${{ secrets.dockerUsername }}
password: ${{ secrets.dockerPassword }}
- name: Login to Quay.io
# if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.quayUsername }}
password: ${{ secrets.quayPassword }}
- name: Build and push
uses: docker/build-push-action@v6
if: github.event_name != 'pull_request'
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
BASE_IMAGE=${{ inputs.base-image }}
BACKEND=${{ inputs.backend }}
context: ./backend
file: ./backend/Dockerfile.python
cache-from: type=gha
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Build and push (PR)
uses: docker/build-push-action@v6
if: github.event_name == 'pull_request'
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
BASE_IMAGE=${{ inputs.base-image }}
BACKEND=${{ inputs.backend }}
context: ./backend
file: ./backend/Dockerfile.python
cache-from: type=gha
platforms: ${{ inputs.platforms }}
push: true
tags: ${{ steps.meta_pull_request.outputs.tags }}
labels: ${{ steps.meta_pull_request.outputs.labels }}
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

View File

@@ -9,26 +9,17 @@ jobs:
fail-fast: false
matrix:
include:
- repository: "ggerganov/llama.cpp"
- repository: "ggml-org/llama.cpp"
variable: "CPPLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-ggml-transformers.cpp"
variable: "GOGGMLTRANSFORMERS_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
- repository: "ggerganov/whisper.cpp"
- repository: "ggml-org/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
- repository: "go-skynet/go-bert.cpp"
variable: "BERT_VERSION"
branch: "master"
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
- repository: "PABannier/bark.cpp"
variable: "BARKCPP_VERSION"
branch: "main"
- repository: "mudler/go-ggllm.cpp"
variable: "GOGGLLM_VERSION"
- repository: "leejet/stable-diffusion.cpp"
variable: "STABLEDIFFUSION_GGML_VERSION"
branch: "master"
- repository: "mudler/go-stable-diffusion"
variable: "STABLEDIFFUSION_VERSION"

View File

@@ -5,7 +5,7 @@ on:
workflow_dispatch:
jobs:
checksum_check:
runs-on: arc-runner-set
runs-on: ubuntu-latest
steps:
- name: Force Install GIT latest
run: |

View File

@@ -14,7 +14,7 @@ jobs:
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@v2.2.0
uses: dependabot/fetch-metadata@v2.4.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true

View File

@@ -33,7 +33,7 @@ jobs:
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.2.0
uses: appleboy/ssh-action@v1.2.2
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -42,7 +42,7 @@ jobs:
script: |
sudo rm -rf local-ai/ || true
- name: copy file via ssh
uses: appleboy/scp-action@v0.1.7
uses: appleboy/scp-action@v1.0.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.2.0
uses: appleboy/ssh-action@v1.2.2
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -2,9 +2,10 @@ name: 'generate and publish GRPC docker caches'
on:
workflow_dispatch:
push:
branches:
- master
schedule:
# daily at midnight
- cron: '0 0 * * *'
concurrency:
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}

View File

@@ -15,7 +15,7 @@ jobs:
strategy:
matrix:
include:
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
runs-on: 'ubuntu-latest'
platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}}

View File

@@ -9,13 +9,12 @@ concurrency:
cancel-in-progress: true
jobs:
extras-image-build:
image-build:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -33,108 +32,42 @@ jobs:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
fail-fast: false
matrix:
include:
# This is basically covered by the AIO test
# - build-type: ''
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-ffmpeg'
# ffmpeg: 'true'
# image-type: 'extras'
# runs-on: 'arc-runner-set'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'hipblas'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-hipblas'
# ffmpeg: 'false'
# image-type: 'extras'
# base-image: "rocm/dev-ubuntu-22.04:6.1"
# grpc-base-image: "ubuntu:22.04"
# runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'sycl_f16'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg'
# ffmpeg: 'true'
# image-type: 'extras'
# runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target"
# core-image-build:
# uses: ./.github/workflows/image_build.yml
# with:
# tag-latest: ${{ matrix.tag-latest }}
# tag-suffix: ${{ matrix.tag-suffix }}
# ffmpeg: ${{ matrix.ffmpeg }}
# image-type: ${{ matrix.image-type }}
# build-type: ${{ matrix.build-type }}
# cuda-major-version: ${{ matrix.cuda-major-version }}
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
# platforms: ${{ matrix.platforms }}
# runs-on: ${{ matrix.runs-on }}
# base-image: ${{ matrix.base-image }}
# grpc-base-image: ${{ matrix.grpc-base-image }}
# makeflags: ${{ matrix.makeflags }}
# secrets:
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
# strategy:
# matrix:
# include:
# - build-type: ''
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'sycl_f16'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'cublas'
# cuda-major-version: "12"
# cuda-minor-version: "0"
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'vulkan'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-vulkan-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl-f16-ffmpeg'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"

View File

@@ -19,7 +19,6 @@ jobs:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -29,238 +28,24 @@ jobs:
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: 2
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
tag-suffix: '-gpu-hipblas'
ffmpeg: 'true'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
matrix:
include:
# Extra images
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda11-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-11"
latest-image: 'latest-gpu-nvidia-cuda-11'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-12"
latest-image: 'latest-gpu-nvidia-cuda-12'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f16"
latest-image: 'latest-gpu-intel-f16'
latest-image-aio: 'latest-aio-gpu-intel-f16'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f32"
latest-image: 'latest-gpu-intel-f32'
latest-image-aio: 'latest-aio-gpu-intel-f32'
makeflags: "--jobs=3 --output-sync=target"
# Core images
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-sycl-f32-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml
@@ -268,7 +53,6 @@ jobs:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -278,80 +62,113 @@ jobs:
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
skip-drivers: ${{ matrix.skip-drivers }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
#max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
- build-type: ''
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: '-ffmpeg-core'
tag-suffix: ''
ffmpeg: 'true'
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
aio: "-aio-cpu"
latest-image: 'latest-cpu'
latest-image-aio: 'latest-aio-cpu'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core'
ffmpeg: ''
image-type: 'core'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda11'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
aio: "-aio-gpu-nvidia-cuda-11"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
ffmpeg: ''
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda12'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-nvidia-cuda-12"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
latest-image: 'latest-vulkan-ffmpeg-core'
tag-latest: 'auto'
tag-suffix: '-vulkan'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-vulkan"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel-f16'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel-f16"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel-f32'
ffmpeg: 'true'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel-f32"
gh-runner:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
skip-drivers: ${{ matrix.skip-drivers }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
matrix:
include:
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64'
ffmpeg: 'true'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'true'

View File

@@ -33,14 +33,6 @@ on:
description: 'Tag latest'
default: ''
type: string
latest-image:
description: 'Tag latest'
default: ''
type: string
latest-image-aio:
description: 'Tag latest'
default: ''
type: string
tag-suffix:
description: 'Tag suffix'
default: ''
@@ -49,9 +41,9 @@ on:
description: 'FFMPEG'
default: ''
type: string
image-type:
description: 'Image type'
default: ''
skip-drivers:
description: 'Skip drivers by default'
default: 'false'
type: string
runs-on:
description: 'Runs on'
@@ -81,6 +73,22 @@ jobs:
reusable_image-build:
runs-on: ${{ inputs.runs-on }}
steps:
- name: Free Disk Space (Ubuntu)
if: inputs.runs-on == 'ubuntu-latest'
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Force Install GIT latest
run: |
sudo apt-get update \
@@ -102,8 +110,8 @@ jobs:
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
@@ -148,18 +156,18 @@ jobs:
type=sha
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
suffix=${{ inputs.tag-suffix }},onlatest=true
- name: Docker meta for PR
id: meta_pull_request
if: github.event_name == 'pull_request'
uses: docker/metadata-action@v5
with:
images: |
ttl.sh/localai-ci-pr-${{ github.event.number }}
quay.io/go-skynet/ci-tests
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
type=ref,event=branch,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
type=semver,pattern={{raw}},suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
type=sha,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
@@ -175,7 +183,7 @@ jobs:
type=semver,pattern={{raw}}
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }}
suffix=${{ inputs.aio }},onlatest=true
- name: Docker meta AIO (dockerhub)
if: inputs.aio != ''
@@ -188,7 +196,8 @@ jobs:
type=ref,event=branch
type=semver,pattern={{raw}}
flavor: |
suffix=${{ inputs.aio }}
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }},onlatest=true
- name: Set up QEMU
uses: docker/setup-qemu-action@master
@@ -228,12 +237,12 @@ jobs:
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
FFMPEG=${{ inputs.ffmpeg }}
IMAGE_TYPE=${{ inputs.image-type }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }}
SKIP_DRIVERS=${{ inputs.skip-drivers }}
context: .
file: ./Dockerfile
cache-from: type=gha
@@ -256,23 +265,19 @@ jobs:
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
FFMPEG=${{ inputs.ffmpeg }}
IMAGE_TYPE=${{ inputs.image-type }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }}
SKIP_DRIVERS=${{ inputs.skip-drivers }}
context: .
file: ./Dockerfile
cache-from: type=gha
platforms: ${{ inputs.platforms }}
push: true
#push: true
tags: ${{ steps.meta_pull_request.outputs.tags }}
labels: ${{ steps.meta_pull_request.outputs.labels }}
- name: Testing image
if: github.event_name == 'pull_request'
run: |
echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
## End testing image
- name: Build and push AIO image
if: inputs.aio != ''
@@ -304,27 +309,6 @@ jobs:
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
- name: Latest tag
# run this on branches, when it is a tag and there is a latest-image defined
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
run: |
docker pull localai/localai:${{ steps.meta.outputs.version }}
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
docker push localai/localai:${{ inputs.latest-image }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
- name: Latest AIO tag
# run this on branches, when it is a tag and there is a latest-image defined
if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
run: |
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
docker push localai/localai:${{ inputs.latest-image-aio }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

View File

@@ -8,7 +8,7 @@ jobs:
notify-discord:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
MODEL_NAME: gemma-3-12b-it
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@@ -16,9 +16,9 @@ jobs:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
- uses: GrantBirki/git-diff-action@v2.8.1
id: git-diff-action
with:
json_diff_file_output: diff.json
@@ -79,7 +79,7 @@ jobs:
args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
@@ -87,7 +87,7 @@ jobs:
notify-twitter:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
MODEL_NAME: gemma-3-12b-it
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@@ -99,7 +99,7 @@ jobs:
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
- uses: GrantBirki/git-diff-action@v2.8.1
id: git-diff-action
with:
json_diff_file_output: diff.json
@@ -161,7 +161,7 @@ jobs:
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -14,7 +14,7 @@ jobs:
steps:
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
- name: Summarize
id: summarize
run: |
@@ -60,4 +60,4 @@ jobs:
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}
args: ${{ steps.summarize.outputs.message }}

View File

@@ -36,6 +36,7 @@ jobs:
sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
make install-go-tools
- name: Install CUDA Dependencies
run: |
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
@@ -123,14 +124,70 @@ jobs:
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-linux:
runs-on: arc-runner-set
runs-on: ubuntu-latest
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Release space from worker
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get remove -y microsoft-edge-stable || true
sudo apt-get remove -y firefox || true
sudo apt-get remove -y powershell || true
sudo apt-get remove -y r-base-core || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /opt/ghc || true
sudo rm -rf "/usr/local/share/boost" || true
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
df -h
- name: Force Install GIT latest
run: |
sudo apt-get update \
@@ -151,6 +208,7 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
make install-go-tools
- name: Intel Dependencies
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -175,17 +233,12 @@ jobs:
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
sudo apt update
wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
sudo apt update
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
hipblas-dev rocm-dev \
rocblas-dev
sudo amdgpu-install --usecase=rocm
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
@@ -232,45 +285,12 @@ jobs:
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-stablediffusion:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build stablediffusion
run: |
export PATH=$PATH:$GOPATH/bin
make backend-assets/grpc/stablediffusion
mkdir -p release && cp backend-assets/grpc/stablediffusion release
env:
GO_TAGS: stablediffusion
- uses: actions/upload-artifact@v4
with:
name: stablediffusion
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
build-macOS-x86_64:
runs-on: macos-13
@@ -286,8 +306,7 @@ jobs:
- name: Dependencies
run: |
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
make install-go-tools
- name: Build
id: build
run: |
@@ -308,7 +327,7 @@ jobs:
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
@@ -328,8 +347,7 @@ jobs:
- name: Dependencies
run: |
brew install protobuf grpc libomp llvm
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
make install-go-tools
- name: Build
id: build
run: |
@@ -350,7 +368,7 @@ jobs:
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.21.4
uses: securego/gosec@v2.22.5
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'

24
.github/workflows/stalebot.yml vendored Normal file
View File

@@ -0,0 +1,24 @@
name: 'Close stale issues and PRs'
permissions:
issues: write
pull-requests: write
on:
schedule:
- cron: '30 1 * * *'
jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
with:
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
close-pr-message: 'This PR was closed because it has been stalled for 10 days with no activity.'
days-before-issue-stale: 90
days-before-pr-stale: 90
days-before-issue-close: 5
days-before-pr-close: 10
exempt-issue-labels: 'roadmap'
exempt-pr-labels: 'roadmap'

View File

@@ -14,6 +14,28 @@ concurrency:
cancel-in-progress: true
jobs:
# Requires CUDA
# tests-chatterbox-tts:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test chatterbox-tts
# run: |
# make --jobs=5 --output-sync=target -C backend/python/chatterbox
# make --jobs=5 --output-sync=target -C backend/python/chatterbox test
tests-transformers:
runs-on: ubuntu-latest
steps:
@@ -35,30 +57,6 @@ jobs:
run: |
make --jobs=5 --output-sync=target -C backend/python/transformers
make --jobs=5 --output-sync=target -C backend/python/transformers test
tests-sentencetransformers:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test sentencetransformers
run: |
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
tests-rerankers:
runs-on: ubuntu-latest
steps:
@@ -102,78 +100,47 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/diffusers
make --jobs=5 --output-sync=target -C backend/python/diffusers test
tests-parler-tts:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
#tests-vllm:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install -y build-essential ffmpeg
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test vllm backend
# run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm
# make --jobs=5 --output-sync=target -C backend/python/vllm test
# tests-transformers-musicgen:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test parler-tts
run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
tests-openvoice:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test openvoice
run: |
make --jobs=5 --output-sync=target -C backend/python/openvoice
make --jobs=5 --output-sync=target -C backend/python/openvoice test
tests-transformers-musicgen:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test transformers-musicgen
run: |
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
# - name: Test transformers-musicgen
# run: |
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
# tests-bark:
# runs-on: ubuntu-latest
@@ -260,26 +227,6 @@ jobs:
# run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm
# make --jobs=5 --output-sync=target -C backend/python/vllm test
tests-vallex:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test vall-e-x
run: |
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
tests-coqui:
runs-on: ubuntu-latest

View File

@@ -71,7 +71,7 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
sudo apt-get install -y libgmock-dev
sudo apt-get install -y libgmock-dev clang
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
@@ -96,19 +96,17 @@ jobs:
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install github.com/GeertJohan/go.rice/rice@latest
# The python3-grpc-tools package in 22.04 is too old
pip install --user grpcio-tools
pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
sudo rm -rfv /usr/bin/conda || true
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
make -C backend/python/transformers
# Pre-build piper before we start tests in order to have shared libraries in place
make sources/go-piper && \
GO_TAGS="tts" make -C sources/go-piper piper.o && \
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
env:
CUDA_VERSION: 12-4
- name: Cache grpc
@@ -130,10 +128,10 @@ jobs:
cd grpc && cd cmake/build && sudo make --jobs 5 install
- name: Test
run: |
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
@@ -186,10 +184,11 @@ jobs:
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install github.com/GeertJohan/go.rice/rice@latest
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build images
run: |
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
docker build --build-arg FFMPEG=true --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test
run: |
@@ -197,7 +196,7 @@ jobs:
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180
@@ -224,7 +223,8 @@ jobs:
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
pip install --user --no-cache-dir grpcio-tools
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
go install github.com/GeertJohan/go.rice/rice@latest
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
@@ -235,7 +235,7 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
uses: mxschmitt/action-tmate@v3.22
with:
detached: true
connect-timeout-seconds: 180

View File

@@ -8,7 +8,7 @@ jobs:
steps:
- name: 'Checkout'
uses: actions/checkout@master
- name: 'Yamllint'
- name: 'Yamllint model gallery'
uses: karancode/yamllint-github-action@master
with:
yamllint_file_or_dir: 'gallery'
@@ -16,3 +16,11 @@ jobs:
yamllint_comment: true
env:
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: 'Yamllint Backend gallery'
uses: karancode/yamllint-github-action@master
with:
yamllint_file_or_dir: 'backend'
yamllint_strict: false
yamllint_comment: true
env:
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@
/sources/
__pycache__/
*.a
*.o
get-sources
prepare-sources
/backend/cpp/llama/grpc-server

2
.vscode/launch.json vendored
View File

@@ -26,7 +26,7 @@
"LOCALAI_P2P": "true",
"LOCALAI_FEDERATED": "true"
},
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
"buildFlags": ["-tags", "p2p tts", "-v"],
"envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}"
}

View File

@@ -1,126 +1,32 @@
ARG IMAGE_TYPE=extras
ARG BASE_IMAGE=ubuntu:22.04
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
FROM ${BASE_IMAGE} AS requirements-core
USER root
ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH
ARG TARGETVARIANT
FROM ${BASE_IMAGE} AS requirements
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ccache \
ca-certificates \
curl libssl-dev \
git \
unzip upx-ucl && \
ca-certificates curl wget espeak-ng libgomp1 \
python3 python-is-python3 ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev \
libopencv-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Set up OpenCV
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
WORKDIR /build
###################################
###################################
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
FROM requirements-core AS requirements-extras
# Install uv as a system package
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN apt-get update && \
apt-get install -y --no-install-recommends \
espeak-ng \
espeak \
python3-pip \
python-is-python3 \
python3-dev llvm \
python3-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
pip install --upgrade pip
# Install grpcio-tools (the version in 22.04 is too old)
RUN pip install --user grpcio-tools
###################################
###################################
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
FROM requirements AS requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=0
ARG SKIP_DRIVERS=false
ARG TARGETARCH
ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ]; then
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
@@ -136,7 +42,7 @@ EOT
# CuBLAS requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ]; then
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
@@ -162,7 +68,7 @@ RUN <<EOT bash
EOT
# If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
@@ -170,7 +76,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
rm -rf /var/lib/apt/lists/* \
; fi
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
hipblas-dev \
@@ -182,6 +88,83 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
ldconfig \
; fi
# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}
###################################
###################################
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
FROM requirements-drivers AS build-requirements
ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH
ARG TARGETVARIANT
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ccache \
ca-certificates espeak-ng \
curl libssl-dev \
git \
git-lfs \
unzip upx-ucl python3 python-is-python3 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers and rice
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
go install github.com/GeertJohan/go.rice/rice@latest
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
WORKDIR /build
###################################
###################################
@@ -248,13 +231,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
FROM requirements-drivers AS builder-base
FROM build-requirements AS builder-base
ARG GO_TAGS="stablediffusion tts p2p"
ARG GO_TAGS="tts p2p"
ARG GRPC_BACKENDS
ARG MAKEFLAGS
ARG LD_FLAGS="-s -w"
ARG TARGETARCH
ARG TARGETVARIANT
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS}
ENV MAKEFLAGS=${MAKEFLAGS}
@@ -284,53 +268,51 @@ RUN <<EOT bash
fi
EOT
###################################
###################################
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
FROM builder-base AS builder-sd
# Compile backends first in a separate stage
FROM builder-base AS builder-backends
ARG TARGETARCH
ARG TARGETVARIANT
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
COPY Makefile .
COPY go.mod .
COPY go.sum .
COPY backend/backend.proto ./backend/backend.proto
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
COPY pkg/grpc ./pkg/grpc
COPY pkg/stablediffusion ./pkg/stablediffusion
RUN git init
RUN make sources/go-stable-diffusion
RUN touch prepare-sources
COPY --from=grpc /opt/grpc /usr/local
# Actually build the backend
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
WORKDIR /build
###################################
###################################
COPY ./Makefile .
COPY ./backend ./backend
COPY ./go.mod .
COPY ./go.sum .
COPY ./.git ./.git
# Some of the Go backends use libs from the main src, we could further optimize the caching by building the CPP backends before here
COPY ./pkg/grpc ./pkg/grpc
COPY ./pkg/utils ./pkg/utils
COPY ./pkg/langchain ./pkg/langchain
RUN ls -l ./
RUN make backend-assets
RUN make prepare
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make grpcs; \
else \
make grpcs; \
fi
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM builder-sd AS builder
FROM builder-backends AS builder
# Install the pre-built GRPC
COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends
WORKDIR /build
COPY . .
COPY .git .
RUN make prepare
## Build the binary
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
## Otherwise just run the normal build
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \
make build; \
fi
@@ -348,26 +330,13 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
FROM builder-base AS devcontainer
ARG FFMPEG
COPY --from=grpc /opt/grpc /usr/local
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
COPY .devcontainer-scripts /.devcontainer-scripts
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ssh less wget
ssh less
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
RUN go install github.com/go-delve/delve/cmd/dlv@latest
@@ -381,44 +350,16 @@ RUN go install github.com/mikefarah/yq/v4@latest
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM requirements-drivers
ARG FFMPEG
ARG BUILD_TYPE
ARG TARGETARCH
ARG IMAGE_TYPE=extras
ARG EXTRA_BACKENDS
ARG MAKEFLAGS
ENV BUILD_TYPE=${BUILD_TYPE}
ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV MAKEFLAGS=${MAKEFLAGS}
ARG CUDA_MAJOR_VERSION=12
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
WORKDIR /
WORKDIR /build
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
# https://github.com/go-skynet/LocalAI/pull/434
COPY . .
COPY --from=builder /build/sources ./sources/
COPY --from=grpc /opt/grpc /usr/local
RUN make prepare-sources
COPY ./entrypoint.sh .
# Copy the binary
COPY --from=builder /build/local-ai ./
@@ -426,67 +367,13 @@ COPY --from=builder /build/local-ai ./
# Copy shared libraries for piper
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
# Change the shell to bash so we can use [[ tests below
SHELL ["/bin/bash", "-c"]
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/coqui \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/parler-tts \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/diffusers \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers-musicgen \
; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/vall-e-x \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/openvoice \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/sentencetransformers \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama2 \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers \
; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/vllm \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/autogptq \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/bark \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/rerankers \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/mamba \
; fi
# Make sure the models directory exists
RUN mkdir -p /build/models
RUN mkdir -p /models /backends
# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
VOLUME /build/models
VOLUME /models /backends
EXPOSE 8080
ENTRYPOINT [ "/build/entrypoint.sh" ]
ENTRYPOINT [ "/entrypoint.sh" ]

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

353
Makefile
View File

@@ -6,29 +6,26 @@ BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
CPPLLAMA_VERSION?=72babea5dea56c8a8e8420ccf731b12a5cf37854
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=c88ffbf9baeaae8c2cc0a4f496618314bb2ee9e0
# go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
# stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
# bark.cpp
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
BARKCPP_VERSION?=v1.0.0
# tinydream version
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
# ONEAPI variables for SYCL
export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
@@ -36,8 +33,12 @@ ONNX_OS?=linux
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=
export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export BACKEND_LIBS?=
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?=
@@ -87,6 +88,7 @@ endif
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Detect if we are running on arm64
@@ -114,13 +116,31 @@ ifeq ($(OS),Darwin)
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1
GO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif
ifeq ($(BUILD_TYPE),metal)
# -lcblas removed: it seems to always be listed as a duplicate flag.
CGO_LDFLAGS += -framework Accelerate
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
CMAKE_ARGS+=-DGGML_OPENMP=OFF
WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
else
CGO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif
else
CGO_LDFLAGS_WHISPER+=-lgomp
@@ -132,21 +152,29 @@ ifeq ($(BUILD_TYPE),openblas)
endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
export GGML_CUDA=1
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
CMAKE_ARGS+=-DGGML_CUDA=ON
WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
endif
ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export GGML_SYCL=1
CMAKE_ARGS+=-DGGML_SYCL=ON
endif
ifeq ($(BUILD_TYPE),sycl_f16)
export GGML_SYCL_F16=1
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),hipblas)
@@ -155,10 +183,9 @@ ifeq ($(BUILD_TYPE),hipblas)
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export GGML_HIP=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
@@ -179,16 +206,6 @@ ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static
endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
endif
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
endif
ifeq ($(findstring tts,$(GO_TAGS)),tts)
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
@@ -198,14 +215,20 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ifeq ($(ONNX_OS),linux)
ifeq ($(ONNX_ARCH),x64)
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
endif
endif
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
@@ -228,31 +251,22 @@ endif
all: help
## BERT embeddings
sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp
cd sources/go-bert.cpp && \
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
## bark.cpp
sources/bark.cpp:
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
cd sources/bark.cpp && \
git checkout $(BARKCPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a
sources/bark.cpp/build/libbark.a: sources/bark.cpp
cd sources/bark.cpp && \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) .. && \
cmake --build . --config Release
## go-llama.cpp
sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp
cd sources/go-llama.cpp && \
git init && \
git remote add origin $(GOLLAMA_REPO) && \
git fetch origin && \
git checkout $(GOLLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
$(MAKE) -C backend/go/bark libbark.a
## go-piper
sources/go-piper:
@@ -267,18 +281,19 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## stable diffusion
sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion
cd sources/go-stable-diffusion && \
git init && \
git remote add origin $(STABLEDIFFUSION_REPO) && \
git fetch origin && \
git checkout $(STABLEDIFFUSION_VERSION) && \
## stablediffusion (ggml)
sources/stablediffusion-ggml.cpp:
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
cd sources/stablediffusion-ggml.cpp && \
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
$(MAKE) -C backend/go/image/stablediffusion-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" stablediffusion-ggml
sources/onnxruntime:
mkdir -p sources/onnxruntime
@@ -294,19 +309,6 @@ else
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
endif
## tiny-dream
sources/go-tiny-dream:
mkdir -p sources/go-tiny-dream
cd sources/go-tiny-dream && \
git init && \
git remote add origin $(TINYDREAM_REPO) && \
git fetch origin && \
git checkout $(TINYDREAM_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
$(MAKE) -C sources/go-tiny-dream libtinydream.a
## whisper
sources/whisper.cpp:
mkdir -p sources/whisper.cpp
@@ -317,28 +319,21 @@ sources/whisper.cpp:
git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
cd sources/whisper.cpp/build && cmake --build . --config Release
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
$(GOCMD) mod download
@@ -346,12 +341,8 @@ prepare-sources: get-sources replace
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean
$(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
prepare: prepare-sources $(OPTIONAL_TARGETS)
@@ -364,7 +355,9 @@ clean: ## Remove build related file
rm -rf release/
rm -rf backend-assets/*
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/go/bark clean
$(MAKE) -C backend/cpp/llama clean
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
rm -rf backend/cpp/llama-* || true
$(MAKE) dropreplace
$(MAKE) protogen-clean
@@ -378,8 +371,14 @@ clean-tests:
clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets
## Install Go tools
install-go-tools:
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install github.com/GeertJohan/go.rice/rice@latest
## Build:
build: prepare backend-assets grpcs ## Build the project
build: prepare backend-assets grpcs install-go-tools ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
@@ -389,7 +388,9 @@ ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
endif
rm -rf $(BINARY_NAME) || true
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
rice append --exec $(BINARY_NAME)
build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
@@ -451,7 +452,7 @@ run: prepare ## run local-ai
test-models/testmodel.ggml:
mkdir test-models
mkdir test-dir
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
wget -q https://huggingface.co/mradermacher/gpt2-alpaca-gpt4-GGUF/resolve/main/gpt2-alpaca-gpt4.Q4_K_M.gguf -O test-models/testmodel.ggml
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -461,13 +462,13 @@ prepare-test: grpcs
cp -rf backend-assets core/http
cp tests/models_fixtures/* test-models
## Test targets
test: prepare test-models/testmodel.ggml grpcs
@echo 'Running tests'
export GO_TAGS="tts stablediffusion debug"
export GO_TAGS="tts debug"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-llama
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-llama-gguf
$(MAKE) test-tts
$(MAKE) test-stablediffusion
@@ -496,10 +497,6 @@ teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
@@ -540,7 +537,7 @@ protogen: protogen-go protogen-python
protogen-clean: protogen-go-clean protogen-python-clean
.PHONY: protogen-go
protogen-go:
protogen-go: install-go-tools
mkdir -p pkg/grpc/proto
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
backend/backend.proto
@@ -551,18 +548,10 @@ protogen-go-clean:
$(RM) bin/*
.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
protogen-python: bark-protogen coqui-protogen chatterbox-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
$(MAKE) -C backend/python/autogptq protogen
.PHONY: autogptq-protogen-clean
autogptq-protogen-clean:
$(MAKE) -C backend/python/autogptq protogen-clean
protogen-python-clean: bark-protogen-clean coqui-protogen-clean chatterbox-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
.PHONY: bark-protogen
bark-protogen:
@@ -584,10 +573,26 @@ coqui-protogen-clean:
diffusers-protogen:
$(MAKE) -C backend/python/diffusers protogen
.PHONY: chatterbox-protogen
chatterbox-protogen:
$(MAKE) -C backend/python/chatterbox protogen
.PHONY: diffusers-protogen-clean
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: chatterbox-protogen-clean
chatterbox-protogen-clean:
$(MAKE) -C backend/python/chatterbox protogen-clean
.PHONY: faster-whisper-protogen
faster-whisper-protogen:
$(MAKE) -C backend/python/faster-whisper protogen
.PHONY: faster-whisper-protogen-clean
faster-whisper-protogen-clean:
$(MAKE) -C backend/python/faster-whisper protogen-clean
.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
@@ -596,14 +601,6 @@ exllama2-protogen:
exllama2-protogen-clean:
$(MAKE) -C backend/python/exllama2 protogen-clean
.PHONY: mamba-protogen
mamba-protogen:
$(MAKE) -C backend/python/mamba protogen
.PHONY: mamba-protogen-clean
mamba-protogen-clean:
$(MAKE) -C backend/python/mamba protogen-clean
.PHONY: rerankers-protogen
rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen
@@ -612,14 +609,6 @@ rerankers-protogen:
rerankers-protogen-clean:
$(MAKE) -C backend/python/rerankers protogen-clean
.PHONY: sentencetransformers-protogen
sentencetransformers-protogen:
$(MAKE) -C backend/python/sentencetransformers protogen
.PHONY: sentencetransformers-protogen-clean
sentencetransformers-protogen-clean:
$(MAKE) -C backend/python/sentencetransformers protogen-clean
.PHONY: transformers-protogen
transformers-protogen:
$(MAKE) -C backend/python/transformers protogen
@@ -628,37 +617,13 @@ transformers-protogen:
transformers-protogen-clean:
$(MAKE) -C backend/python/transformers protogen-clean
.PHONY: parler-tts-protogen
parler-tts-protogen:
$(MAKE) -C backend/python/parler-tts protogen
.PHONY: kokoro-protogen
kokoro-protogen:
$(MAKE) -C backend/python/kokoro protogen
.PHONY: parler-tts-protogen-clean
parler-tts-protogen-clean:
$(MAKE) -C backend/python/parler-tts protogen-clean
.PHONY: transformers-musicgen-protogen
transformers-musicgen-protogen:
$(MAKE) -C backend/python/transformers-musicgen protogen
.PHONY: transformers-musicgen-protogen-clean
transformers-musicgen-protogen-clean:
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
.PHONY: vall-e-x-protogen
vall-e-x-protogen:
$(MAKE) -C backend/python/vall-e-x protogen
.PHONY: vall-e-x-protogen-clean
vall-e-x-protogen-clean:
$(MAKE) -C backend/python/vall-e-x protogen-clean
.PHONY: openvoice-protogen
openvoice-protogen:
$(MAKE) -C backend/python/openvoice protogen
.PHONY: openvoice-protogen-clean
openvoice-protogen-clean:
$(MAKE) -C backend/python/openvoice protogen-clean
.PHONY: kokoro-protogen-clean
kokoro-protogen-clean:
$(MAKE) -C backend/python/kokoro protogen-clean
.PHONY: vllm-protogen
vllm-protogen:
@@ -671,28 +636,28 @@ vllm-protogen-clean:
## GRPC
# Note: it is duplicated in the Dockerfile
prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/autogptq
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/chatterbox
$(MAKE) -C backend/python/faster-whisper
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/mamba
$(MAKE) -C backend/python/sentencetransformers
$(MAKE) -C backend/python/rerankers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/transformers-musicgen
$(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/kokoro
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/chatterbox
$(MAKE) -C backend/python/vllm
test-extra: prepare-test-extra
$(MAKE) -C backend/python/transformers test
$(MAKE) -C backend/python/diffusers test
$(MAKE) -C backend/python/chatterbox test
$(MAKE) -C backend/python/vllm test
backend-assets:
mkdir -p backend-assets
@@ -707,13 +672,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
@@ -760,6 +718,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx512
$(MAKE) -C backend/cpp/llama-avx512 purge
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge
@@ -773,10 +738,6 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-cuda
@@ -817,11 +778,11 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml
$(UPX) backend-assets/grpc/bark-cpp
endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
@@ -831,13 +792,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion
endif
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
@@ -845,15 +799,8 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/silero-vad
endif
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/tinydream
endif
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper
@@ -905,17 +852,17 @@ docker-aio-all:
docker-image-intel:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
docker-image-intel-xpu:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
@@ -925,7 +872,7 @@ swagger:
.PHONY: gen-assets
gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
## Documentation
docs/layouts/_default:

179
README.md
View File

@@ -1,7 +1,6 @@
<h1 align="center">
<br>
<img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
LocalAI
<img height="300" src="./core/http/static/logo.png"> <br>
<br>
</h1>
@@ -31,7 +30,7 @@
<p align="center">
<a href="https://twitter.com/LocalAI_API" target="blank">
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/>
</a>
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
@@ -39,40 +38,142 @@
</p>
<p align="center">
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API thats compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e)
## 📚🆕 Local Stack Family
🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
<table>
<tr>
<td width="50%" valign="top">
<a href="https://github.com/mudler/LocalAGI">
<img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
</a>
</td>
<td width="50%" valign="top">
<h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
<p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
</td>
</tr>
<tr>
<td width="50%" valign="top">
<a href="https://github.com/mudler/LocalRecall">
<img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
</a>
</td>
<td width="50%" valign="top">
<h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
<p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
</td>
</tr>
</table>
## Screenshots
| Talk Interface | Generate Audio |
| --- | --- |
| ![Screenshot 2025-03-31 at 12-01-36 LocalAI - Talk](./docs/assets/images/screenshots/screenshot_tts.png) | ![Screenshot 2025-03-31 at 12-01-29 LocalAI - Generate audio with voice-en-us-ryan-low](./docs/assets/images/screenshots/screenshot_tts.png) |
| Models Overview | Generate Images |
| --- | --- |
| ![Screenshot 2025-03-31 at 12-01-20 LocalAI - Models](./docs/assets/images/screenshots/screenshot_gallery.png) | ![Screenshot 2025-03-31 at 12-31-41 LocalAI - Generate images with flux 1-dev](./docs/assets/images/screenshots/screenshot_image.png) |
| Chat Interface | Home |
| --- | --- |
| ![Screenshot 2025-03-31 at 11-57-44 LocalAI - Chat with localai-functioncall-qwen2 5-7b-v0 5](./docs/assets/images/screenshots/screenshot_chat.png) | ![Screenshot 2025-03-31 at 11-57-23 LocalAI API - c2a39e3 (c2a39e3639227cfd94ffffe9f5691239acc275a8)](./docs/assets/images/screenshots/screenshot_home.png) |
| Login | Swarm |
| --- | --- |
|![Screenshot 2025-03-31 at 12-09-59 ](./docs/assets/images/screenshots/screenshot_login.png) | ![Screenshot 2025-03-31 at 12-10-39 LocalAI - P2P dashboard](./docs/assets/images/screenshots/screenshot_p2p.png) |
## 💻 Quickstart
Run the installer script:
```bash
# Basic installation
curl https://localai.io/install.sh | sh
```
Or run with docker:
```bash
# CPU only image:
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
# Nvidia GPU:
Or run with docker:
### CPU only image:
```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
```
### NVIDIA GPU Images:
```bash
# CUDA 12.0
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
# CPU and GPU image (bigger size):
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# CUDA 11.7
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# NVIDIA Jetson (L4T) ARM64
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
```
### AMD GPU Images (ROCm):
```bash
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
```
### Intel GPU Images (oneAPI):
```bash
# Intel GPU with FP16 support
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
# Intel GPU with FP32 support
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
```
### Vulkan GPU Images:
```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
```
### AIO Images (pre-downloaded models):
```bash
# CPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# NVIDIA CUDA 12 version
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
# NVIDIA CUDA 11 version
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
# Intel GPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
# AMD GPU version
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
```
For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
To load models:
```bash
@@ -88,46 +189,36 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
local-ai run oci://localai/phi-2:latest
```
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
## 📰 Latest project news
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
- Apr 2025: Rebrand, WebUI enhancements
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
- Apr 2025: WebUI overhaul, AIO images updates
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
## 🔥🔥 Hot topics (looking for help):
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
- Realtime API https://github.com/mudler/LocalAI/issues/3714
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
## 🚀 [Features](https://localai.io/features/)
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
- 🎨 [Image generation](https://localai.io/features/image-generation)
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
@@ -135,11 +226,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
- 🔊 Voice activity detection (Silero-VAD support)
- 🌍 Integrated WebUI!
## 💻 Usage
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
### 🔗 Community and integrations
@@ -157,6 +247,7 @@ Model galleries
Other:
- Helm chart https://github.com/go-skynet/helm-charts
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
- Terminal utility https://github.com/djcopley/ShellOracle
- Local Smart assistant https://github.com/mudler/LocalAGI
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
@@ -214,7 +305,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
<p align="center">
<a href="https://www.spectrocloud.com/" target="blank">
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
<img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
</a>
<a href="https://www.premai.io/" target="blank">
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>

View File

@@ -1,7 +1,7 @@
embeddings: true
name: text-embedding-ada-002
backend: bert-embeddings
parameters:
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
usage: |
You can test this model with curl like this:

View File

@@ -1,56 +1,17 @@
name: stablediffusion
backend: stablediffusion
backend: stablediffusion-ggml
cfg_scale: 4.5
options:
- sampler:euler
parameters:
model: stablediffusion_assets
license: "BSD-3"
urls:
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
description: |
Stable Diffusion in NCNN with c++, supported txt2img and img2img
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
step: 25
download_files:
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- filename: "stablediffusion_assets/log_sigmas.bin"
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
usage: |
curl http://localhost:8080/v1/images/generations \

View File

@@ -1,7 +1,13 @@
name: jina-reranker-v1-base-en
backend: rerankers
reranking: true
f16: true
parameters:
model: cross-encoder
model: jina-reranker-v1-tiny-en.f16.gguf
download_files:
- filename: jina-reranker-v1-tiny-en.f16.gguf
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
usage: |
You can test this model with curl like this:

View File

@@ -1,101 +1,57 @@
name: gpt-4
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
context_size: 8192
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
f16: true
function:
# disable injecting the "answer" tool
disable_no_action: true
grammar:
# This allows the grammar to also return messages
mixed_mode: true
# Suffix to add to the grammar
#prefix: '<tool_call>\n'
# Force parallel calls in the grammar
# parallel_calls: true
return_name_in_function_response: true
# Without grammar uncomment the lines below
# Warning: this is relying only on the capability of the
# LLM model to generate the correct function call.
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*?)"
replace_llm_results:
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
replace_function_results:
# Replace everything that is not JSON array or object
#
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
- key: "'([^']*?)'"
value: "_DQUOTE_${1}_DQUOTE_"
- key: '\\"'
value: "__TEMP_QUOTE__"
- key: "\'"
value: "'"
- key: "_DQUOTE_"
value: '"'
- key: "__TEMP_QUOTE__"
value: '"'
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
no_mixed_free_string: true
schema_type: llama3.1 # or JSON is supported too (json)
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
mmap: true
name: gpt-4
parameters:
model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- <|eot_id|>
- <|end_of_text|>
template:
chat: |
{{.Input -}}
<|im_start|>assistant
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
{{ else if eq .RoleName "tool" -}}
The Function was executed and the response was:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ range .FunctionCall }}
[{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
{{ end }}
{{ end -}}
<|eot_id|>
completion: |
{{.Input}}
function: |-
<|im_start|>system
You are a function calling AI model.
Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
You should call the tools provided to you sequentially
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
<scratchpad>
{step-by-step reasoning and plan in bullet points}
</scratchpad>
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
<tool_call>
{"arguments": <args-dict>, "name": <function-name>}
</tool_call><|im_end|>
{{.Input -}}
<|im_start|>assistant
function: |
<|start_header_id|>system<|end_header_id|>
You are an expert in composing functions. You are given a question and a set of possible functions.
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
If you decide to invoke any of the function(s), you MUST put it in the format as follows:
[func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
You SHOULD NOT include any other text in the response.
Here is a list of functions in JSON format that you can invoke.
{{toJson .Functions}}
<|eot_id|><|start_header_id|>user<|end_header_id|>
{{.Input}}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
download_files:
- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf

8
aio/cpu/vad.yaml Normal file
View File

@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -1,31 +1,49 @@
backend: llama-cpp
context_size: 4096
f16: true
mmap: true
mmproj: minicpm-v-2_6-mmproj-f16.gguf
name: gpt-4o
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: bakllava-mmproj.gguf
parameters:
model: bakllava.gguf
model: minicpm-v-2_6-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
completion: |
{{.Input}}
ASSISTANT:
function: |
<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: bakllava.gguf
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
- filename: minicpm-v-2_6-Q4_K_M.gguf
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-2_6-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd

View File

@@ -129,10 +129,10 @@ detect_gpu
detect_gpu_size
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
check_vars
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
exec /build/entrypoint.sh "$@"
exec /entrypoint.sh "$@"

View File

@@ -1,7 +1,7 @@
embeddings: true
name: text-embedding-ada-002
backend: sentencetransformers
parameters:
model: all-MiniLM-L6-v2
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
usage: |
You can test this model with curl like this:

View File

@@ -1,7 +1,13 @@
name: jina-reranker-v1-base-en
backend: rerankers
reranking: true
f16: true
parameters:
model: cross-encoder
model: jina-reranker-v1-tiny-en.f16.gguf
download_files:
- filename: jina-reranker-v1-tiny-en.f16.gguf
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
usage: |
You can test this model with curl like this:

View File

@@ -1,101 +1,53 @@
name: gpt-4
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
context_size: 8192
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
context_size: 4096
f16: true
function:
# disable injecting the "answer" tool
disable_no_action: true
capture_llm_results:
- (?s)<Thought>(.*?)</Thought>
grammar:
# This allows the grammar to also return messages
mixed_mode: true
# Suffix to add to the grammar
#prefix: '<tool_call>\n'
# Force parallel calls in the grammar
# parallel_calls: true
return_name_in_function_response: true
# Without grammar uncomment the lines below
# Warning: this is relying only on the capability of the
# LLM model to generate the correct function call.
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*?)"
properties_order: name,arguments
json_regex_match:
- (?s)<Output>(.*?)</Output>
replace_llm_results:
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
- key: (?s)<Thought>(.*?)</Thought>
value: ""
replace_function_results:
# Replace everything that is not JSON array or object
#
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
- key: "'([^']*?)'"
value: "_DQUOTE_${1}_DQUOTE_"
- key: '\\"'
value: "__TEMP_QUOTE__"
- key: "\'"
value: "'"
- key: "_DQUOTE_"
value: '"'
- key: "__TEMP_QUOTE__"
value: '"'
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
mmap: true
name: gpt-4
parameters:
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
{{ end -}}<|im_end|>
completion: |
{{.Input}}
function: |-
function: |
<|im_start|>system
You are a function calling AI model.
Here are the available tools:
<tools>
You are an AI assistant that executes function calls, and these are the tools at your disposal:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
You should call the tools provided to you sequentially
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
<scratchpad>
{step-by-step reasoning and plan in bullet points}
</scratchpad>
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
<tool_call>
{"arguments": <args-dict>, "name": <function-name>}
</tool_call><|im_end|>
<|im_end|>
{{.Input -}}
<|im_start|>assistant
<|im_start|>assistant
download_files:
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf

8
aio/gpu-8g/vad.yaml Normal file
View File

@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -1,35 +1,49 @@
backend: llama-cpp
context_size: 4096
f16: true
mmap: true
mmproj: minicpm-v-2_6-mmproj-f16.gguf
name: gpt-4o
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: llava-v1.6-7b-mmproj-f16.gguf
parameters:
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
model: minicpm-v-2_6-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
completion: |
{{.Input}}
ASSISTANT:
function: |
<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
- filename: llava-v1.6-7b-mmproj-f16.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
- filename: minicpm-v-2_6-Q4_K_M.gguf
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-2_6-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd

View File

@@ -1,7 +1,7 @@
embeddings: true
name: text-embedding-ada-002
backend: sentencetransformers
parameters:
model: all-MiniLM-L6-v2
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
usage: |
You can test this model with curl like this:

View File

@@ -1,7 +1,13 @@
name: jina-reranker-v1-base-en
backend: rerankers
reranking: true
f16: true
parameters:
model: cross-encoder
model: jina-reranker-v1-tiny-en.f16.gguf
download_files:
- filename: jina-reranker-v1-tiny-en.f16.gguf
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
usage: |
You can test this model with curl like this:

View File

@@ -1,103 +1,53 @@
name: gpt-4
mmap: false
context_size: 8192
f16: false
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
context_size: 4096
f16: true
function:
# disable injecting the "answer" tool
disable_no_action: true
capture_llm_results:
- (?s)<Thought>(.*?)</Thought>
grammar:
# This allows the grammar to also return messages
mixed_mode: true
# Suffix to add to the grammar
#prefix: '<tool_call>\n'
# Force parallel calls in the grammar
# parallel_calls: true
return_name_in_function_response: true
# Without grammar uncomment the lines below
# Warning: this is relying only on the capability of the
# LLM model to generate the correct function call.
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*?)"
properties_order: name,arguments
json_regex_match:
- (?s)<Output>(.*?)</Output>
replace_llm_results:
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
- key: (?s)<Thought>(.*?)</Thought>
value: ""
replace_function_results:
# Replace everything that is not JSON array or object
#
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
- key: "'([^']*?)'"
value: "_DQUOTE_${1}_DQUOTE_"
- key: '\\"'
value: "__TEMP_QUOTE__"
- key: "\'"
value: "'"
- key: "_DQUOTE_"
value: '"'
- key: "__TEMP_QUOTE__"
value: '"'
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
mmap: true
name: gpt-4
parameters:
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
{{ end -}}<|im_end|>
completion: |
{{.Input}}
function: |-
function: |
<|im_start|>system
You are a function calling AI model.
Here are the available tools:
<tools>
You are an AI assistant that executes function calls, and these are the tools at your disposal:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
You should call the tools provided to you sequentially
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
<scratchpad>
{step-by-step reasoning and plan in bullet points}
</scratchpad>
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
<tool_call>
{"arguments": <args-dict>, "name": <function-name>}
</tool_call><|im_end|>
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf

8
aio/intel/vad.yaml Normal file
View File

@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -1,35 +1,50 @@
backend: llama-cpp
context_size: 4096
mmap: false
f16: false
f16: true
mmap: true
mmproj: minicpm-v-2_6-mmproj-f16.gguf
name: gpt-4o
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: llava-v1.6-7b-mmproj-f16.gguf
parameters:
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
model: minicpm-v-2_6-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
completion: |
{{.Input}}
ASSISTANT:
function: |
<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
- filename: llava-v1.6-7b-mmproj-f16.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
- filename: minicpm-v-2_6-Q4_K_M.gguf
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-2_6-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd

View File

@@ -1,6 +1,15 @@
package main
import "embed"
import (
rice "github.com/GeertJohan/go.rice"
)
//go:embed backend-assets/*
var backendAssets embed.FS
var backendAssets *rice.Box
func init() {
var err error
backendAssets, err = rice.FindBox("backend-assets")
if err != nil {
panic(err)
}
}

131
backend/Dockerfile.go Normal file
View File

@@ -0,0 +1,131 @@
ARG BASE_IMAGE=ubuntu:22.04
FROM ${BASE_IMAGE} AS builder
ARG BACKEND=rerankers
ARG BUILD_TYPE
ENV BUILD_TYPE=${BUILD_TYPE}
ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION
ARG SKIP_DRIVERS=false
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
ENV DEBIAN_FRONTEND=noninteractive
ARG TARGETARCH
ARG TARGETVARIANT
ARG GO_VERSION=1.22.6
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ccache \
ca-certificates \
make \
curl unzip \
libssl-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}
# Vulkan requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# CuBLAS requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
hipblas-dev \
rocblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig \
; fi
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin
# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
RUN echo "TARGETARCH: $TARGETARCH"
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
# here so that we can generate the grpc code for the stablediffusion build
RUN <<EOT bash
if [ "amd64" = "$TARGETARCH" ]; then
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
fi
EOT
COPY . /LocalAI
RUN cd /LocalAI && make backend-assets/grpc/bark-cpp
FROM scratch
COPY --from=builder /LocalAI/backend-assets/grpc/bark-cpp ./
COPY --from=builder /LocalAI/backend/go/bark/run.sh ./

123
backend/Dockerfile.python Normal file
View File

@@ -0,0 +1,123 @@
ARG BASE_IMAGE=ubuntu:22.04
FROM ${BASE_IMAGE} AS builder
ARG BACKEND=rerankers
ARG BUILD_TYPE
ENV BUILD_TYPE=${BUILD_TYPE}
ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION
ARG SKIP_DRIVERS=false
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
ENV DEBIAN_FRONTEND=noninteractive
ARG TARGETARCH
ARG TARGETVARIANT
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ccache \
ca-certificates \
espeak-ng \
curl \
libssl-dev \
git \
git-lfs \
unzip \
upx-ucl \
curl python3-pip \
python-is-python3 \
python3-dev llvm \
python3-venv make && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
pip install --upgrade pip
# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}
# Vulkan requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# CuBLAS requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
hipblas-dev \
rocblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig \
; fi
# Install uv as a system package
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
# Install grpcio-tools (the version in 22.04 is too old)
RUN pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
COPY python/${BACKEND} /${BACKEND}
COPY backend.proto /${BACKEND}/backend.proto
COPY python/common/ /${BACKEND}/common
RUN cd /${BACKEND} && make
FROM scratch
ARG BACKEND=rerankers
COPY --from=builder /${BACKEND}/ /

View File

@@ -14,6 +14,7 @@ service Backend {
rpc PredictStream(PredictOptions) returns (stream Reply) {}
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
rpc TTS(TTSRequest) returns (Result) {}
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
@@ -159,6 +160,13 @@ message Reply {
bytes message = 1;
int32 tokens = 2;
int32 prompt_tokens = 3;
double timing_prompt_processing = 4;
double timing_token_generation = 5;
bytes audio = 6;
}
message GrammarTrigger {
string word = 1;
}
message ModelOptions {
@@ -184,11 +192,7 @@ message ModelOptions {
int32 NGQA = 20;
string ModelFile = 21;
// AutoGPTQ
string Device = 22;
bool UseTriton = 23;
string ModelBaseName = 24;
bool UseFastTokenizer = 25;
// Diffusers
string PipelineType = 26;
@@ -222,6 +226,11 @@ message ModelOptions {
int32 MaxModelLen = 54;
int32 TensorParallelSize = 55;
string LoadFormat = 58;
bool DisableLogStatus = 66;
string DType = 67;
int32 LimitImagePerPrompt = 68;
int32 LimitVideoPerPrompt = 69;
int32 LimitAudioPerPrompt = 70;
string MMProj = 41;
@@ -240,6 +249,15 @@ message ModelOptions {
repeated string LoraAdapters = 60;
repeated float LoraScales = 61;
repeated string Options = 62;
string CacheTypeKey = 63;
string CacheTypeValue = 64;
repeated GrammarTrigger GrammarTriggers = 65;
bool Reranking = 71;
}
message Result {
@@ -287,6 +305,19 @@ message GenerateImageRequest {
int32 CLIPSkip = 11;
}
message GenerateVideoRequest {
string prompt = 1;
string start_image = 2; // Path or base64 encoded image for the start frame
string end_image = 3; // Path or base64 encoded image for the end frame
int32 width = 4;
int32 height = 5;
int32 num_frames = 6; // Number of frames to generate
int32 fps = 7; // Frames per second
int32 seed = 8;
float cfg_scale = 9; // Classifier-free guidance scale
string dst = 10; // Output path for the generated video
}
message TTSRequest {
string text = 1;
string model = 2;
@@ -343,4 +374,4 @@ message StatusResponse {
message Message {
string role = 1;
string content = 2;
}
}

View File

@@ -1,20 +1,3 @@
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
set(TARGET myclip)
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
install(TARGETS ${TARGET} LIBRARY)
target_include_directories(myclip PUBLIC .)
target_include_directories(myclip PUBLIC ../..)
target_include_directories(myclip PUBLIC ../../common)
target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if (NOT MSVC)
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
endif()
# END CLIP hack
set(TARGET grpc-server)
set(CMAKE_CXX_STANDARD 17)
cmake_minimum_required(VERSION 3.15)
@@ -74,8 +57,12 @@ add_library(hw_grpc_proto
${hw_proto_srcs}
${hw_proto_hdrs} )
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)
target_include_directories(${TARGET} PRIVATE ../llava)
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}

View File

@@ -8,7 +8,7 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
@@ -30,19 +30,24 @@ else ifeq ($(OS),Darwin)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
# Until this is tested properly, we disable embedded metal file
# as we already embed it as part of the LocalAI assets
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DCMAKE_CXX_FLAGS="-fsycl" \
-DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DCMAKE_CXX_FLAGS="-fsycl"
endif
llama.cpp:
@@ -54,8 +59,8 @@ llama.cpp:
git checkout -b build $(LLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server
llama.cpp/tools/grpc-server: llama.cpp
mkdir -p llama.cpp/tools/grpc-server
bash prepare.sh
rebuild:
@@ -65,13 +70,13 @@ rebuild:
purge:
rm -rf llama.cpp/build
rm -rf llama.cpp/examples/grpc-server
rm -rf llama.cpp/tools/grpc-server
rm -rf grpc-server
clean: purge
rm -rf llama.cpp
grpc-server: llama.cpp llama.cpp/examples/grpc-server
grpc-server: llama.cpp llama.cpp/tools/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
@@ -79,4 +84,4 @@ ifneq (,$(findstring sycl,$(BUILD_TYPE)))
else
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
endif
cp llama.cpp/build/bin/grpc-server .
cp llama.cpp/build/bin/grpc-server .

View File

File diff suppressed because it is too large Load Diff

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,13 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 342042ff..224db9b5 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches));
for (int i = 0; i < num_patches; i++) {
- patches_data[i] = i + 1;
+ patches_data[i] = i;
}
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
free(patches_data);
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
index 3cd0d2fa..6c5e811a 100644
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches));
for (int i = 0; i < num_patches; i++) {
- patches_data[i] = i + 1;
+ patches_data[i] = i;
}
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
free(patches_data);

View File

@@ -7,21 +7,46 @@ for patch in $(ls patches); do
patch -d llama.cpp/ -p1 < patches/$patch
done
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/examples/grpc-server/
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
set -e
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
set +e
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
echo "grpc-server already added"
else
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
fi
set -e
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
# Now to keep maximum compatibility with the original server.cpp, we need to remove the index.html.gz.hpp and loading.html.hpp includes
# and remove the main function
# TODO: upstream this to the original server.cpp by extracting the upstream main function to a separate file
awk '
/int[ \t]+main[ \t]*\(/ { # If the line starts the main function
in_main=1; # Set a flag
open_braces=0; # Track number of open braces
}
in_main {
open_braces += gsub(/\{/, "{"); # Count opening braces
open_braces -= gsub(/\}/, "}"); # Count closing braces
if (open_braces == 0) { # If all braces are closed
in_main=0; # End skipping
}
next; # Skip lines inside main
}
!in_main # Print lines not inside main
' "llama.cpp/tools/server/server.cpp" > llama.cpp/tools/grpc-server/server.cpp
# remove index.html.gz.hpp and loading.html.hpp includes
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS
sed -i '' '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
else
# Linux and others
sed -i '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
fi

View File

@@ -1,483 +0,0 @@
// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
#pragma once
#include <string>
#include <vector>
#include <set>
#include <mutex>
#include <condition_variable>
#include <unordered_map>
#include "json.hpp"
#include "../llava/clip.h"
using json = nlohmann::json;
extern bool server_verbose;
#ifndef SERVER_VERBOSE
#define SERVER_VERBOSE 1
#endif
#if SERVER_VERBOSE != 1
#define LOG_VERBOSE(MSG, ...)
#else
#define LOG_VERBOSE(MSG, ...) \
do \
{ \
if (server_verbose) \
{ \
server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \
} \
} while (0)
#endif
#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
//
// parallel
//
enum server_state {
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
SERVER_STATE_READY, // Server is ready and model is loaded
SERVER_STATE_ERROR // An error occurred, load_model failed
};
enum task_type {
TASK_TYPE_COMPLETION,
TASK_TYPE_CANCEL,
TASK_TYPE_NEXT_RESPONSE
};
struct task_server {
int id = -1; // to be filled by llama_server_queue
int target_id;
task_type type;
json data;
bool infill_mode = false;
bool embedding_mode = false;
int multitask_id = -1;
};
struct task_result {
int id;
int multitask_id = -1;
bool stop;
bool error;
json result_json;
};
struct task_multi {
int id;
std::set<int> subtasks_remaining{};
std::vector<task_result> results{};
};
// TODO: can become bool if we can't find use of more states
enum slot_state
{
IDLE,
PROCESSING,
};
enum slot_command
{
NONE,
LOAD_PROMPT,
RELEASE,
};
struct slot_params
{
bool stream = true;
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
uint32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_predict = -1; // new tokens to predict
std::vector<std::string> antiprompt;
json input_prefix;
json input_suffix;
};
struct slot_image
{
int32_t id;
bool request_encode_image = false;
float * image_embedding = nullptr;
int32_t image_tokens = 0;
clip_image_u8 * img_data;
std::string prefix_prompt; // before of this image
};
// completion token output with probabilities
struct completion_token_output
{
struct token_prob
{
llama_token tok;
float prob;
};
std::vector<token_prob> probs;
llama_token tok;
std::string text_to_send;
};
static inline void server_log(const char *level, const char *function, int line,
const char *message, const nlohmann::ordered_json &extra)
{
nlohmann::ordered_json log
{
{"timestamp", time(nullptr)},
{"level", level},
{"function", function},
{"line", line},
{"message", message},
};
if (!extra.empty())
{
log.merge_patch(extra);
}
const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
printf("%.*s\n", (int)str.size(), str.data());
fflush(stdout);
}
//
// server utils
//
template <typename T>
static T json_value(const json &body, const std::string &key, const T &default_value)
{
// Fallback null to default value
return body.contains(key) && !body.at(key).is_null()
? body.value(key, default_value)
: default_value;
}
inline std::string format_chatml(std::vector<json> messages)
{
std::ostringstream chatml_msgs;
for (auto it = messages.begin(); it != messages.end(); ++it) {
chatml_msgs << "<|im_start|>"
<< json_value(*it, "role", std::string("user")) << '\n';
chatml_msgs << json_value(*it, "content", std::string(""))
<< "<|im_end|>\n";
}
chatml_msgs << "<|im_start|>assistant" << '\n';
return chatml_msgs.str();
}
//
// work queue utils
//
struct llama_server_queue {
int id = 0;
std::mutex mutex_tasks;
// queues
std::vector<task_server> queue_tasks;
std::vector<task_server> queue_tasks_deferred;
std::vector<task_multi> queue_multitasks;
std::condition_variable condition_tasks;
// callback functions
std::function<void(task_server&)> callback_new_task;
std::function<void(task_multi&)> callback_finish_multitask;
std::function<void(void)> callback_all_task_finished;
// Add a new task to the end of the queue
int post(task_server task) {
std::unique_lock<std::mutex> lock(mutex_tasks);
if (task.id == -1) {
task.id = id++;
}
queue_tasks.push_back(std::move(task));
condition_tasks.notify_one();
return task.id;
}
// Add a new task, but defer until one slot is available
void defer(task_server task) {
std::unique_lock<std::mutex> lock(mutex_tasks);
queue_tasks_deferred.push_back(std::move(task));
}
// Get the next id for creating anew task
int get_new_id() {
std::unique_lock<std::mutex> lock(mutex_tasks);
return id++;
}
// Register function to process a new task
void on_new_task(std::function<void(task_server&)> callback) {
callback_new_task = callback;
}
// Register function to process a multitask
void on_finish_multitask(std::function<void(task_multi&)> callback) {
callback_finish_multitask = callback;
}
// Register the function to be called when the batch of tasks is finished
void on_all_tasks_finished(std::function<void(void)> callback) {
callback_all_task_finished = callback;
}
// Call when the state of one slot is changed
void notify_slot_changed() {
// move deferred tasks back to main loop
std::unique_lock<std::mutex> lock(mutex_tasks);
for (auto & task : queue_tasks_deferred) {
queue_tasks.push_back(std::move(task));
}
queue_tasks_deferred.clear();
}
// Start the main loop. This call is blocking
[[noreturn]]
void start_loop() {
while (true) {
// new task arrived
LOG_VERBOSE("have new task", {});
{
while (true)
{
std::unique_lock<std::mutex> lock(mutex_tasks);
if (queue_tasks.empty()) {
lock.unlock();
break;
}
task_server task = queue_tasks.front();
queue_tasks.erase(queue_tasks.begin());
lock.unlock();
LOG_VERBOSE("callback_new_task", {});
callback_new_task(task);
}
LOG_VERBOSE("callback_all_task_finished", {});
// process and update all the multitasks
auto queue_iterator = queue_multitasks.begin();
while (queue_iterator != queue_multitasks.end())
{
if (queue_iterator->subtasks_remaining.empty())
{
// all subtasks done == multitask is done
task_multi current_multitask = *queue_iterator;
callback_finish_multitask(current_multitask);
// remove this multitask
queue_iterator = queue_multitasks.erase(queue_iterator);
}
else
{
++queue_iterator;
}
}
// all tasks in the current loop is finished
callback_all_task_finished();
}
LOG_VERBOSE("wait for new task", {});
// wait for new task
{
std::unique_lock<std::mutex> lock(mutex_tasks);
if (queue_tasks.empty()) {
condition_tasks.wait(lock, [&]{
return !queue_tasks.empty();
});
}
}
}
}
//
// functions to manage multitasks
//
// add a multitask by specifying the id of all subtask (subtask is a task_server)
void add_multitask(int multitask_id, std::vector<int>& sub_ids)
{
std::lock_guard<std::mutex> lock(mutex_tasks);
task_multi multi;
multi.id = multitask_id;
std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
queue_multitasks.push_back(multi);
}
// updatethe remaining subtasks, while appending results to multitask
void update_multitask(int multitask_id, int subtask_id, task_result& result)
{
std::lock_guard<std::mutex> lock(mutex_tasks);
for (auto& multitask : queue_multitasks)
{
if (multitask.id == multitask_id)
{
multitask.subtasks_remaining.erase(subtask_id);
multitask.results.push_back(result);
}
}
}
};
struct llama_server_response {
typedef std::function<void(int, int, task_result&)> callback_multitask_t;
callback_multitask_t callback_update_multitask;
// for keeping track of all tasks waiting for the result
std::set<int> waiting_task_ids;
// the main result queue
std::vector<task_result> queue_results;
std::mutex mutex_results;
std::condition_variable condition_results;
void add_waiting_task_id(int task_id) {
std::unique_lock<std::mutex> lock(mutex_results);
waiting_task_ids.insert(task_id);
}
void remove_waiting_task_id(int task_id) {
std::unique_lock<std::mutex> lock(mutex_results);
waiting_task_ids.erase(task_id);
}
// This function blocks the thread until there is a response for this task_id
task_result recv(int task_id) {
while (true)
{
std::unique_lock<std::mutex> lock(mutex_results);
condition_results.wait(lock, [&]{
return !queue_results.empty();
});
LOG_VERBOSE("condition_results unblock", {});
for (int i = 0; i < (int) queue_results.size(); i++)
{
if (queue_results[i].id == task_id)
{
assert(queue_results[i].multitask_id == -1);
task_result res = queue_results[i];
queue_results.erase(queue_results.begin() + i);
return res;
}
}
}
// should never reach here
}
// Register the function to update multitask
void on_multitask_update(callback_multitask_t callback) {
callback_update_multitask = callback;
}
// Send a new result to a waiting task_id
void send(task_result result) {
std::unique_lock<std::mutex> lock(mutex_results);
LOG_VERBOSE("send new result", {});
for (auto& task_id : waiting_task_ids) {
// LOG_TEE("waiting task id %i \n", task_id);
// for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
if (result.multitask_id == task_id)
{
LOG_VERBOSE("callback_update_multitask", {});
callback_update_multitask(task_id, result.id, result);
continue;
}
if (result.id == task_id)
{
LOG_VERBOSE("queue_results.push_back", {});
queue_results.push_back(result);
condition_results.notify_one();
return;
}
}
}
};
//
// base64 utils (TODO: move to common in the future)
//
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(uint8_t c)
{
return (isalnum(c) || (c == '+') || (c == '/'));
}
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string)
{
int i = 0;
int j = 0;
int in_ = 0;
int in_len = encoded_string.size();
uint8_t char_array_4[4];
uint8_t char_array_3[3];
std::vector<uint8_t> ret;
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
{
char_array_4[i++] = encoded_string[in_]; in_++;
if (i == 4)
{
for (i = 0; i <4; i++)
{
char_array_4[i] = base64_chars.find(char_array_4[i]);
}
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
{
ret.push_back(char_array_3[i]);
}
i = 0;
}
}
if (i)
{
for (j = i; j <4; j++)
{
char_array_4[j] = 0;
}
for (j = 0; j <4; j++)
{
char_array_4[j] = base64_chars.find(char_array_4[j]);
}
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++)
{
ret.push_back(char_array_3[j]);
}
}
return ret;
}

25
backend/go/bark/Makefile Normal file
View File

@@ -0,0 +1,25 @@
INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./)
AR?=ar
BUILD_TYPE?=
# keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
# warnings
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
gobark.o:
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
libbark.a: gobark.o
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
$(AR) rcs libbark.a gobark.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
clean:
rm -f gobark.o libbark.a

View File

@@ -0,0 +1,85 @@
#include <iostream>
#include <tuple>
#include "bark.h"
#include "gobark.h"
#include "common.h"
#include "ggml.h"
struct bark_context *c;
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
if (step == bark_encoding_step::SEMANTIC) {
printf("\rGenerating semantic tokens... %d%%", progress);
} else if (step == bark_encoding_step::COARSE) {
printf("\rGenerating coarse tokens... %d%%", progress);
} else if (step == bark_encoding_step::FINE) {
printf("\rGenerating fine tokens... %d%%", progress);
}
fflush(stdout);
}
int load_model(char *model) {
// initialize bark context
struct bark_context_params ctx_params = bark_context_default_params();
bark_params params;
params.model_path = model;
// ctx_params.verbosity = verbosity;
ctx_params.progress_callback = bark_print_progress_callback;
ctx_params.progress_callback_user_data = nullptr;
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
if (!bctx) {
fprintf(stderr, "%s: Could not load model\n", __func__);
return 1;
}
c = bctx;
return 0;
}
int tts(char *text,int threads, char *dst ) {
ggml_time_init();
const int64_t t_main_start_us = ggml_time_us();
// generate audio
if (!bark_generate_audio(c, text, threads)) {
fprintf(stderr, "%s: An error occurred. If the problem persists, feel free to open an issue to report it.\n", __func__);
return 1;
}
const float *audio_data = bark_get_audio_data(c);
if (audio_data == NULL) {
fprintf(stderr, "%s: Could not get audio data\n", __func__);
return 1;
}
const int audio_arr_size = bark_get_audio_data_size(c);
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
write_wav_on_disk(audio_arr, dst);
// report timing
{
const int64_t t_main_end_us = ggml_time_us();
const int64_t t_load_us = bark_get_load_time(c);
const int64_t t_eval_us = bark_get_eval_time(c);
printf("\n\n");
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
}
return 0;
}
int unload() {
bark_free(c);
}

52
backend/go/bark/gobark.go Normal file
View File

@@ -0,0 +1,52 @@
package main
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
// #include <gobark.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Bark struct {
base.SingleThread
threads int
}
func (sd *Bark) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads)
modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile))
ret := C.load_model(modelFile)
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
t := C.CString(opts.Text)
defer C.free(unsafe.Pointer(t))
dst := C.CString(opts.Dst)
defer C.free(unsafe.Pointer(dst))
threads := C.int(sd.threads)
ret := C.tts(t, threads, dst)
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}

8
backend/go/bark/gobark.h Normal file
View File

@@ -0,0 +1,8 @@
#ifdef __cplusplus
extern "C" {
#endif
int load_model(char *model);
int tts(char *text,int threads, char *dst );
#ifdef __cplusplus
}
#endif

View File

@@ -1,7 +1,6 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
@@ -15,7 +14,7 @@ var (
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Image{}); err != nil {
if err := grpc.StartServer(*addr, &Bark{}); err != nil {
panic(err)
}
}

3
backend/go/bark/run.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
set -ex
exec ./bark-cpp

View File

@@ -0,0 +1,135 @@
INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./)
AR?=ar
CMAKE_ARGS?=
BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
# keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
GOCMD?=go
CGO_LDFLAGS?=
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
CGO_LDFLAGS_SYCL=
GO_TAGS?=
LD_FLAGS?=
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DSD_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DSD_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DSD_METAL=OFF
else
CMAKE_ARGS+=-DSD_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DSD_SYCL=ON \
-DGGML_SYCL_F16=ON
CC=icx
CXX=icpx
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DSD_SYCL=ON
CC=icx
CXX=icpx
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
endif
# warnings
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
# Find all .a archives in ARCHIVE_DIR
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
GGML_ARCHIVE_DIR := build/ggml/src/
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
# Name of the single merged library
COMBINED_LIB := libggmlall.a
# Rule to merge all the .a files into one
$(COMBINED_LIB): $(ALL_ARCHIVES)
@echo "Merging all .a into $(COMBINED_LIB)"
rm -f $@
mkdir -p merge-tmp
for a in $(ALL_ARCHIVES); do \
( cd merge-tmp && ar x ../$$a ); \
done
( cd merge-tmp && ar rcs ../$@ *.o )
# Ensure we have a proper index
ranlib $@
# Clean up
rm -rf merge-tmp
build/libstable-diffusion.a:
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
cmake --build . --config Release"
else
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
cmake --build . --config Release
endif
$(MAKE) $(COMBINED_LIB)
gosd.o:
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
else
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
endif
libsd.a: gosd.o
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
$(AR) rcs libsd.a gosd.o
stablediffusion-ggml:
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
ifneq ($(UPX),)
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
endif
clean:
rm -rf gosd.o libsd.a build $(COMBINED_LIB)

View File

@@ -0,0 +1,231 @@
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <iostream>
#include <random>
#include <string>
#include <vector>
#include "gosd.h"
// #include "preprocessing.hpp"
#include "flux.hpp"
#include "stable-diffusion.h"
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_STATIC
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#define STB_IMAGE_WRITE_STATIC
#include "stb_image_write.h"
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#define STB_IMAGE_RESIZE_STATIC
#include "stb_image_resize.h"
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
const char* sample_method_str[] = {
"euler_a",
"euler",
"heun",
"dpm2",
"dpm++2s_a",
"dpm++2m",
"dpm++2mv2",
"ipndm",
"ipndm_v",
"lcm",
"ddim_trailing",
"tcd",
};
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
const char* schedule_str[] = {
"default",
"discrete",
"karras",
"exponential",
"ays",
"gits",
};
sd_ctx_t* sd_c;
sample_method_t sample_method;
int load_model(char *model, char* options[], int threads, int diff) {
fprintf (stderr, "Loading model!\n");
char *stableDiffusionModel = "";
if (diff == 1 ) {
stableDiffusionModel = model;
model = "";
}
// decode options. Options are in form optname:optvale, or if booleans only optname.
char *clip_l_path = "";
char *clip_g_path = "";
char *t5xxl_path = "";
char *vae_path = "";
char *scheduler = "";
char *sampler = "";
// If options is not NULL, parse options
for (int i = 0; options[i] != NULL; i++) {
char *optname = strtok(options[i], ":");
char *optval = strtok(NULL, ":");
if (optval == NULL) {
optval = "true";
}
if (!strcmp(optname, "clip_l_path")) {
clip_l_path = optval;
}
if (!strcmp(optname, "clip_g_path")) {
clip_g_path = optval;
}
if (!strcmp(optname, "t5xxl_path")) {
t5xxl_path = optval;
}
if (!strcmp(optname, "vae_path")) {
vae_path = optval;
}
if (!strcmp(optname, "scheduler")) {
scheduler = optval;
}
if (!strcmp(optname, "sampler")) {
sampler = optval;
}
}
int sample_method_found = -1;
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
if (!strcmp(sampler, sample_method_str[m])) {
sample_method_found = m;
}
}
if (sample_method_found == -1) {
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
sample_method_found = EULER_A;
}
sample_method = (sample_method_t)sample_method_found;
int schedule_found = -1;
for (int d = 0; d < N_SCHEDULES; d++) {
if (!strcmp(scheduler, schedule_str[d])) {
schedule_found = d;
fprintf (stderr, "Found scheduler: %s\n", scheduler);
}
}
if (schedule_found == -1) {
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
schedule_found = DEFAULT;
}
schedule_t schedule = (schedule_t)schedule_found;
fprintf (stderr, "Creating context\n");
sd_ctx_t* sd_ctx = new_sd_ctx(model,
clip_l_path,
clip_g_path,
t5xxl_path,
stableDiffusionModel,
vae_path,
"",
"",
"",
"",
"",
false,
false,
false,
threads,
SD_TYPE_COUNT,
STD_DEFAULT_RNG,
schedule,
false,
false,
false,
false);
if (sd_ctx == NULL) {
fprintf (stderr, "failed loading model (generic error)\n");
return 1;
}
fprintf (stderr, "Created context: OK\n");
sd_c = sd_ctx;
return 0;
}
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
sd_image_t* results;
std::vector<int> skip_layers = {7, 8, 9};
fprintf (stderr, "Generating image\n");
results = txt2img(sd_c,
text,
negativeText,
-1, //clip_skip
cfg_scale, // sfg_scale
3.5f,
0, // eta
width,
height,
sample_method,
steps,
seed,
1,
NULL,
0.9f,
20.f,
false,
"",
skip_layers.data(),
skip_layers.size(),
0,
0.01,
0.2);
if (results == NULL) {
fprintf (stderr, "NO results\n");
return 1;
}
if (results[0].data == NULL) {
fprintf (stderr, "Results with no data\n");
return 1;
}
fprintf (stderr, "Writing PNG\n");
fprintf (stderr, "DST: %s\n", dst);
fprintf (stderr, "Width: %d\n", results[0].width);
fprintf (stderr, "Height: %d\n", results[0].height);
fprintf (stderr, "Channel: %d\n", results[0].channel);
fprintf (stderr, "Data: %p\n", results[0].data);
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
results[0].data, 0, NULL);
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
// TODO: free results. Why does it crash?
free(results[0].data);
results[0].data = NULL;
free(results);
fprintf (stderr, "gen_image is done", dst);
return 0;
}
int unload() {
free_sd_ctx(sd_c);
}

View File

@@ -0,0 +1,96 @@
package main
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
// #include <gosd.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"os"
"path/filepath"
"strings"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
type SDGGML struct {
base.SingleThread
threads int
sampleMethod string
cfgScale float32
}
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads)
modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile))
var options **C.char
// prepare the options array to pass to C
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
length := C.size_t(len(opts.Options))
options = (**C.char)(C.malloc(length * size))
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
var diffusionModel int
var oo []string
for _, op := range opts.Options {
if op == "diffusion_model" {
diffusionModel = 1
continue
}
// If it's an option path, we resolve absolute path from the model path
if strings.Contains(op, ":") && strings.Contains(op, "path") {
data := strings.Split(op, ":")
data[1] = filepath.Join(opts.ModelPath, data[1])
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
oo = append(oo, strings.Join(data, ":"))
}
} else {
oo = append(oo, op)
}
}
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
for i, x := range oo {
view[i] = C.CString(x)
}
sd.cfgScale = opts.CFGScale
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
if ret != 0 {
return fmt.Errorf("could not load model")
}
return nil
}
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
t := C.CString(opts.PositivePrompt)
defer C.free(unsafe.Pointer(t))
dst := C.CString(opts.Dst)
defer C.free(unsafe.Pointer(dst))
negative := C.CString(opts.NegativePrompt)
defer C.free(unsafe.Pointer(negative))
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}

View File

@@ -0,0 +1,8 @@
#ifdef __cplusplus
extern "C" {
#endif
int load_model(char *model, char* options[], int threads, int diffusionModel);
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
#ifdef __cplusplus
}
#endif

View File

@@ -1,7 +1,6 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
@@ -15,7 +14,7 @@ var (
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Image{}); err != nil {
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
panic(err)
}
}

View File

@@ -1,33 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/stablediffusion"
)
type Image struct {
base.SingleThread
stablediffusion *stablediffusion.StableDiffusion
}
func (image *Image) Load(opts *pb.ModelOptions) error {
var err error
// Note: the Model here is a path to a directory containing the model files
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
return err
}
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
return image.stablediffusion.GenerateImage(
int(opts.Height),
int(opts.Width),
int(opts.Mode),
int(opts.Step),
int(opts.Seed),
opts.PositivePrompt,
opts.NegativePrompt,
opts.Dst)
}

View File

@@ -1,32 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/tinydream"
)
type Image struct {
base.SingleThread
tinydream *tinydream.TinyDream
}
func (image *Image) Load(opts *pb.ModelOptions) error {
var err error
// Note: the Model here is a path to a directory containing the model files
image.tinydream, err = tinydream.New(opts.ModelFile)
return err
}
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
return image.tinydream.GenerateImage(
int(opts.Height),
int(opts.Width),
int(opts.Step),
int(opts.Seed),
opts.PositivePrompt,
opts.NegativePrompt,
opts.Dst)
}

View File

@@ -1,34 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
bert "github.com/go-skynet/go-bert.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Embeddings struct {
base.SingleThread
bert *bert.Bert
}
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
model, err := bert.New(opts.ModelFile)
llm.bert = model
return err
}
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
}
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
}

View File

@@ -1,21 +0,0 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
panic(err)
}
}

View File

@@ -1,204 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type LLM struct {
base.SingleThread
llama *llama.LLama
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
llamaOpts := []llama.ModelOption{
llama.WithRopeFreqBase(ropeFreqBase),
llama.WithRopeFreqScale(ropeFreqScale),
}
if opts.NGQA != 0 {
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
}
if opts.RMSNormEps != 0 {
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
}
if opts.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
}
if opts.F16Memory {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if opts.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if opts.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
}
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
} else {
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
}
if opts.NUMA {
llamaOpts = append(llamaOpts, llama.EnableNUMA)
}
if opts.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}
model, err := llama.New(opts.ModelFile, llamaOpts...)
llm.llama = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
predictOptions := []llama.PredictOption{
llama.SetTemperature(opts.Temperature),
llama.SetTopP(opts.TopP),
llama.SetTopK(int(opts.TopK)),
llama.SetTokens(int(opts.Tokens)),
llama.SetThreads(int(opts.Threads)),
llama.WithGrammar(opts.Grammar),
llama.SetRopeFreqBase(ropeFreqBase),
llama.SetRopeFreqScale(ropeFreqScale),
llama.SetNegativePromptScale(opts.NegativePromptScale),
llama.SetNegativePrompt(opts.NegativePrompt),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
}
if opts.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
}
if opts.F16KV {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
results <- token
return true
}))
go func() {
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
return nil
}
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
predictOptions := buildPredictOptions(opts)
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
}
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
}

View File

@@ -1,19 +0,0 @@
package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -58,6 +58,9 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
if opts.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if opts.Reranking {
llamaOpts = append(llamaOpts, llama.EnableReranking)
}
if opts.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
}

View File

@@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
}
func isNormalized(k []float32) bool {
var sum float32
var sum float64
for _, v := range k {
sum += v
v64 := float64(v)
sum += v64*v64
}
return sum == 1.0
s := math.Sqrt(sum)
return s >= 0.99 && s <= 1.01
}
// TODO: This we could replace with handwritten SIMD code
@@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
dot += k1[i] * k2[i]
}
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
// 2.0 * (1.0 - dot) would be the Euclidean distance
return dot
@@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
return sim
}

Some files were not shown because too many files have changed in this diff Show More