Compare commits

...

193 Commits

Author SHA1 Message Date
Ettore Di Giacinto
455aee4eaf chore(model gallery): add qihoo360_tinyr1-32b-preview
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 10:23:17 +01:00
Ettore Di Giacinto
326be287da chore(model gallery): add ibm-granite_granite-3.2-2b-instruct (#4928)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 10:22:35 +01:00
Ettore Di Giacinto
0404d98190 chore(model gallery): add ibm-granite_granite-3.2-8b-instruct (#4927)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-02 10:19:27 +01:00
LocalAI [bot]
0a8ec1eb22 chore: ⬆️ Update ggml-org/llama.cpp to 1782cdfed60952f9ff333fc2ab5245f2be702453 (#4926)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-03-02 10:02:49 +01:00
Ettore Di Giacinto
d860932dcd fix(chatml): add endoftext stopword
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 21:16:10 +01:00
Ettore Di Giacinto
1cb137bd2d fix(deephermes): correct typo
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-03-01 17:07:12 +01:00
Ettore Di Giacinto
3c279e5568 chore(model gallery): add allenai_olmocr-7b-0225-preview (#4924)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 09:10:04 +01:00
Ettore Di Giacinto
fb55e3df57 chore(model gallery): add ozone-research_0x-lite (#4923)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 09:07:01 +01:00
Ettore Di Giacinto
de46fb6e2e chore(model gallery): add ozone-research_chirp-01 (#4922)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 09:05:03 +01:00
Ettore Di Giacinto
d7a0e3c5ea chore(model gallery): add microsoft_phi-4-mini-instruct (#4921)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-03-01 08:58:01 +01:00
LocalAI [bot]
0533ea817d chore: ⬆️ Update ggml-org/llama.cpp to 06c2b1561d8b882bc018554591f8c35eb04ad30e (#4920)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-28 22:18:14 +00:00
Ettore Di Giacinto
755e4fb5f4 feat(ui): improvements to index and models page (#4918)
- mobile-friendly index
- adjust color palette
- improve search experience

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-28 19:23:32 +01:00
LocalAI [bot]
e4fdde158f chore: ⬆️ Update ggml-org/llama.cpp to b95c8af37ccf169b0a3216b7ed691af0534e5091 (#4916)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-28 00:00:39 +00:00
Ettore Di Giacinto
6d0712fa6d fix(ui): not all models comes from gallery (#4915)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 19:12:41 +01:00
Ettore Di Giacinto
bbbb28e3ca fix(models): unify usecases identifications (#4914)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 15:51:12 +01:00
Ettore Di Giacinto
3bf2e9d065 fix(ui): not all models have an Icon (#4913)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 10:52:19 +01:00
Ettore Di Giacinto
1461fd8777 chore(model gallery): add locutusque_thespis-llama-3.1-8b (#4912)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-27 10:02:44 +01:00
LocalAI [bot]
054860539a chore: ⬆️ Update ggml-org/llama.cpp to a800ae46da2ed7dac236aa6bf2b595da6b6294b5 (#4911)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-26 22:43:49 +00:00
Ettore Di Giacinto
c87870b18e feat(ui): improve chat interface (#4910)
* feat(ui): show more informations in the chat view, minor adjustments to model gallery

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(ui): UI improvements

Visual improvements and bugfixes including:
- disable pagination during search
- fix scrolling on new message

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-26 18:27:18 +01:00
Ettore Di Giacinto
5ad2be9c45 feat(ui): small improvements to chat interface (#4907)
- Change chat colors
- Improve layout on small windows

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-26 11:10:40 +01:00
LocalAI [bot]
61a24746a1 chore: ⬆️ Update ggml-org/llama.cpp to d7cfe1ffe0f435d0048a6058d529daf76e072d9c (#4908)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-25 21:58:37 +00:00
Ettore Di Giacinto
d557eb9361 chore(model gallery): add latitudegames_wayfarer-large-70b-llama-3.3 (#4903)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-25 10:21:54 +01:00
Ettore Di Giacinto
a9a1a361a9 chore(model gallery): add perplexity-ai_r1-1776-distill-llama-70b (#4902)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-25 09:59:21 +01:00
Ettore Di Giacinto
12d070af80 chore(model gallery): add sicariussicariistuff_phi-line_14b (#4901)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-25 09:56:44 +01:00
LocalAI [bot]
8d40557bc8 chore: ⬆️ Update ggml-org/llama.cpp to 7a2c913e66353362d7f28d612fd3c9d51a831eda (#4899)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-25 09:51:02 +01:00
dependabot[bot]
5a5f3a899a chore(deps): Bump docs/themes/hugo-theme-relearn from 66bc366 to 02bba0f (#4898)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `66bc366` to `02bba0f`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](66bc366c47...02bba0f199)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-25 09:50:46 +01:00
dependabot[bot]
a2d1f133c8 chore(deps): Bump appleboy/ssh-action from 1.2.0 to 1.2.1 (#4896)
Bumps [appleboy/ssh-action](https://github.com/appleboy/ssh-action) from 1.2.0 to 1.2.1.
- [Release notes](https://github.com/appleboy/ssh-action/releases)
- [Changelog](https://github.com/appleboy/ssh-action/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/appleboy/ssh-action/compare/v1.2.0...v1.2.1)

---
updated-dependencies:
- dependency-name: appleboy/ssh-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-24 21:01:39 +00:00
LocalAI [bot]
0ae6420c31 chore: ⬆️ Update ggml-org/llama.cpp to 7ad0779f5de84a68143b2c00ab5dc94a948925d3 (#4890)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-24 11:26:09 +01:00
Ettore Di Giacinto
3a3e05cf18 chore(model gallery): add flux.1dev-abliteratedv2 (#4895)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 10:11:32 +01:00
Ettore Di Giacinto
6a20388e25 chore(model gallery): add nohobby_l3.3-prikol-70b-extra (#4894)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 09:43:50 +01:00
Ettore Di Giacinto
06c836a937 chore(model gallery): add steelskull_l3.3-san-mai-r1-70b (#4893)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 09:41:06 +01:00
Ettore Di Giacinto
049a13fe78 chore(model gallery): add steelskull_l3.3-cu-mai-r1-70b (#4892)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-24 09:39:12 +01:00
Ettore Di Giacinto
30bf6c962f chore(stable-diffusion-ggml): update, adapt upstream changes (#4889)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-23 08:36:41 +01:00
LocalAI [bot]
a72b3a23c3 chore: ⬆️ Update ggml-org/llama.cpp to a28e0d5eb18c18e6a4598286158f427269b1444e (#4887)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-23 08:36:25 +01:00
Ettore Di Giacinto
e9971b168a feat(ui): paginate model gallery (#4886)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 21:38:00 +01:00
Ettore Di Giacinto
5b59b5e0c1 chore(model gallery): add steelskull_l3.3-mokume-gane-r1-70b (#4885)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 18:58:06 +01:00
Ettore Di Giacinto
8cfd712428 chore(model gallery): add arcee-ai_arcee-maestro-7b-preview (#4884)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 11:32:25 +01:00
Ettore Di Giacinto
21f7faa80d chore(model gallery): add ozone-ai_reverb-7b (#4883)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 11:28:27 +01:00
Ettore Di Giacinto
a6a0121118 chore(model gallery): add rombo-org_rombo-llm-v3.0-qwen-72b (#4882)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-22 11:19:04 +01:00
LocalAI [bot]
ba66aa33c5 chore: ⬆️ Update ggml-org/llama.cpp to 51f311e057723b7454d0ebe20f545a1a2c4db6b2 (#4881)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-21 21:51:02 +00:00
Ettore Di Giacinto
8fc024a770 chore(model gallery): add pocketdoc_dans-personalityengine-v1.2.0-24b (#4880)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 10:00:23 +01:00
Ettore Di Giacinto
52aa9d08aa chore(model gallery): add l3.1-8b-rp-ink (#4879)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 09:56:57 +01:00
Ettore Di Giacinto
4c9379c39e chore(model gallery): add smirki_uigen-t1.1-qwen-7b (#4878)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 09:54:42 +01:00
Ettore Di Giacinto
0ff2c39364 chore(model gallery): add smirki_uigen-t1.1-qwen-14b (#4877)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-21 09:52:20 +01:00
LocalAI [bot]
1af7e5dc49 chore: ⬆️ Update ggml-org/llama.cpp to c392e5094deaf2d1a7c18683214f007fad3fe42b (#4876)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-20 22:03:52 +00:00
Ettore Di Giacinto
af3bb64e42 fix(coqui): pin transformers (#4875)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 16:16:54 +01:00
Ettore Di Giacinto
77281f836e chore(model gallery): add internlm_oreal-7b (#4874)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:57:21 +01:00
Ettore Di Giacinto
550275811d chore(model gallery): add internlm_oreal-deepseek-r1-distill-qwen-7b (#4873)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:55:13 +01:00
Ettore Di Giacinto
c27ce6c54d chore(model gallery): add internlm_oreal-32b (#4872)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:52:28 +01:00
Ettore Di Giacinto
ac4991b069 chore(docs): update sponsor logo
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:31:41 +01:00
Ettore Di Giacinto
25bee71bb8 feat(ui): do also filter tts and image models (#4871)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-20 15:02:18 +01:00
LocalAI [bot]
b993780a3b chore: ⬆️ Update ggml-org/llama.cpp to d04e7163c85a847bc61d58c22f2c503596db7aa8 (#4870)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-20 09:42:57 +01:00
Ettore Di Giacinto
ea0c9f1168 feat(ui): show only text models in the chat interface (#4869)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 17:34:30 +01:00
Ettore Di Giacinto
08311f275a chore(model gallery): add sentientagi_dobby-unhinged-llama-3.3-70b (#4868)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 10:36:36 +01:00
Ettore Di Giacinto
4de0f2f737 chore(model gallery): add open-r1_openr1-qwen-7b (#4867)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 10:04:01 +01:00
Ettore Di Giacinto
42ae807c41 chore(model gallery): add pygmalionai_pygmalion-3-12b (#4866)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-19 10:02:35 +01:00
LocalAI [bot]
94593ba4c3 chore: ⬆️ Update ggml-org/llama.cpp to 63e489c025d61c7ca5ec06c5d10f36e2b76aaa1d (#4865)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-19 09:19:28 +01:00
Brandon Beiler
6a6e1a0ea9 feat(vllm): Additional vLLM config options (Disable logging, dtype, and Per-Prompt media limits) (#4855)
* Adding the following vLLM config options: disable_log_status, dtype, limit_mm_per_prompt

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

* using " marks in the config.yaml file

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

* adding in missing colon

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>

---------

Signed-off-by: TheDropZone <brandonbeiler@gmail.com>
2025-02-18 19:27:58 +01:00
Ettore Di Giacinto
5b19af99ff feat(ui): detect model usage and display link (#4864)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 19:27:07 +01:00
Ettore Di Giacinto
28fb8e607a chore(model gallery): add nbeerbower_dumpling-qwen2.5-72b (#4862)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 12:44:59 +01:00
Ettore Di Giacinto
bb85b6ef00 feat: improve ui models list in the index (#4863)
* feat(ui): improve index

- Redirect to the chat view when clicking on a model

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Display chat icon nearby the model

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 12:44:44 +01:00
Ettore Di Giacinto
b9b5a635ca chore(model gallery): add nbeerbower_dumpling-qwen2.5-32b-v2 (#4861)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:53:23 +01:00
Ettore Di Giacinto
131ea5b627 chore(model gallery): add nbeerbower_dumpling-qwen2.5-14b (#4860)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:51:29 +01:00
Ettore Di Giacinto
fac70e9642 chore(model gallery): add allenai_llama-3.1-tulu-3.1-8b (#4859)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:49:26 +01:00
Ettore Di Giacinto
7e76ea40fb chore(model gallery): add kubeguru-llama3.2-3b-v0.1 (#4858)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-18 11:47:00 +01:00
LocalAI [bot]
de09ae42ef chore: ⬆️ Update ggml-org/llama.cpp to 73e2ed3ce3492d3ed70193dd09ae8aa44779651d (#4854)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-18 09:11:07 +01:00
Ettore Di Giacinto
6424f0666d chore(deps): Bump edgevpn to v0.30.1 (#4840)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-17 16:51:22 +01:00
Ettore Di Giacinto
f3ae94ca70 chore: update Image generation docs and examples (#4841)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-17 16:51:06 +01:00
LocalAI [bot]
09c9f67a02 chore: ⬆️ Update ggml-org/llama.cpp to 2eea03d86a2d132c8245468c26290ce07a27a8e8 (#4839)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-17 10:55:30 +01:00
Ettore Di Giacinto
c264ca542d fix(ci): update repository for llama.cpp
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-17 09:33:34 +01:00
Bas Hulsken
bbf30d416d fix: change initialization order of llama-cpp-avx512 to go before avx2 variant (#4837)
changed to initialization order of the avx512 version of llama.cpp, now tries before avx2

Signed-off-by: Bas Hulsken <bhulsken@hotmail.com>
2025-02-17 09:32:21 +01:00
Ettore Di Giacinto
27617a1b06 chore(model gallery): add ozone-ai_0x-lite (#4835)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-16 09:23:26 +01:00
Ettore Di Giacinto
e84081769e chore(ci): cleanup before pulling images again 2025-02-16 09:20:22 +01:00
LocalAI [bot]
20119fc580 docs: ⬆️ update docs version mudler/LocalAI (#4834)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-15 22:45:11 +00:00
Ettore Di Giacinto
09941c0bfb chore(docs): update license year
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 18:17:15 +01:00
Ettore Di Giacinto
cabe0f4993 chore(model gallery): add davidbrowne17_llamathink-8b-instruct (#4833)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 17:31:46 +01:00
Ettore Di Giacinto
1977c7f190 chore(model gallery): add pygmalionai_eleusis-12b (#4832)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 17:21:30 +01:00
Ettore Di Giacinto
061e7c4eae chore(model gallery): add rombo-org_rombo-llm-v3.0-qwen-32b (#4830)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-15 10:58:27 +01:00
LocalAI [bot]
5313e660f6 chore: ⬆️ Update ggerganov/llama.cpp to 300907b2110cc17b4337334dc397e05de2d8f5e0 (#4829)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-14 21:51:49 +00:00
Ettore Di Giacinto
9e32fda304 fix(llama.cpp): improve context shift handling (#4820)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 14:55:03 +01:00
Ettore Di Giacinto
83202cae54 chore(model gallery): add nousresearch_deephermes-3-llama-3-8b-preview (#4828)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 12:25:00 +01:00
Ettore Di Giacinto
d96addfa9d chore(model gallery): add open-thoughts_openthinker-32b (#4827)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 12:03:46 +01:00
Ettore Di Giacinto
a715fe588d chore(model gallery): add sicariussicariistuff_phi-lthy4 (#4826)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-14 11:57:14 +01:00
LocalAI [bot]
2ac4a86bb4 chore: ⬆️ Update ggerganov/llama.cpp to 8a8c4ceb6050bd9392609114ca56ae6d26f5b8f5 (#4825)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-13 21:49:57 +00:00
Ettore Di Giacinto
8670d480a6 chore(model gallery): add nvidia_aceinstruct-72b (#4822)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-13 09:58:34 +01:00
Ettore Di Giacinto
af0b4ff237 chore(ci): update labels
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-13 09:58:19 +01:00
Ettore Di Giacinto
e694764065 chore(model gallery): add nvidia_aceinstruct-7b (#4821)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-13 09:44:53 +01:00
Ettore Di Giacinto
f3c27e0381 chore(model gallery): add nvidia_aceinstruct-1.5b (#4819)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-13 09:33:40 +01:00
LocalAI [bot]
bf44319d0d chore: ⬆️ Update ggerganov/llama.cpp to 0fb77f821f6e70ad8b8247a97d1022f0fef78991 (#4814)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-12 22:41:53 +00:00
Ettore Di Giacinto
5b133a640b chore(model gallery): add theskullery_l3.3-exp-unnamed-model-70b-v0.5 (#4813)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-12 11:05:51 +01:00
Ettore Di Giacinto
0030a3fe75 chore(model gallery): add simplescaling_s1.1-32b (#4812)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-12 11:03:05 +01:00
Ettore Di Giacinto
0a748b009e chore(ci): avoit cache hits until the ci gRPC job is fixed
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-12 09:11:40 +01:00
LocalAI [bot]
257e951def chore: ⬆️ Update ggerganov/llama.cpp to 90e4dba461b07e635fd1daf3b491c978c7dd0013 (#4810)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-12 00:13:28 +01:00
LocalAI [bot]
fbd82a2dd0 feat(swagger): update swagger (#4809)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-11 21:54:40 +00:00
Ettore Di Giacinto
5db321dad2 chore(ci): do not always regenerate the cache
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 16:36:00 +01:00
Ettore Di Giacinto
f5638a6354 feat(diffusers): allow to override image gen options (#4807)
Use the options field in the model to override kwargs if needed.

This allows to specify from the model yaml config:

```yaml

options:
- foo:bar

```

And each option will be used directly when calling the diffusers
pipeline, e.g:

```python
pipe(
  foo="bar",
)
```

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 10:16:32 +01:00
Ettore Di Giacinto
5f64cc6328 Revert "chore(deps): Bump docs/themes/lotusdocs from f5785a2 to 975da91" (#4808)
Revert "chore(deps): Bump docs/themes/lotusdocs from `f5785a2` to `975da91` (…"

This reverts commit e57b750ca3.
2025-02-11 10:05:57 +01:00
Ettore Di Giacinto
28b10e8804 chore(swagger): update (#4805)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 09:51:01 +01:00
Ettore Di Giacinto
3277f5095d chore(model gallery): add agentica-org_deepscaler-1.5b-preview (#4804)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 09:47:19 +01:00
Ettore Di Giacinto
fe3ced2919 chore(ci): try again to bump parallelism in grpc jobs
As we moved these out to self-hosted

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-11 09:31:00 +01:00
LocalAI [bot]
45e37a07bb chore: ⬆️ Update ggerganov/llama.cpp to 19b392d58dc08c366d0b29bd3b9c6991fa4e1662 (#4803)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-11 09:17:02 +01:00
dependabot[bot]
e57b750ca3 chore(deps): Bump docs/themes/lotusdocs from f5785a2 to 975da91 (#4801)
Bumps [docs/themes/lotusdocs](https://github.com/colinwilson/lotusdocs) from `f5785a2` to `975da91`.
- [Release notes](https://github.com/colinwilson/lotusdocs/releases)
- [Commits](f5785a2399...975da91e83)

---
updated-dependencies:
- dependency-name: docs/themes/lotusdocs
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-10 22:27:14 +00:00
Ettore Di Giacinto
49df492268 chore(ci): run grpc build on self-hosted
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 19:44:50 +01:00
Ettore Di Giacinto
516cd660f1 chore(grpcio): reduce parallelism (#4799)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 18:56:13 +01:00
Ettore Di Giacinto
8fd3ace9a1 chore(grpcio): bump to 1.70 (#4798)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 18:38:53 +01:00
Ettore Di Giacinto
099469cb05 chore(tests): decrease parallelism for gRPC builds (#4797)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 12:59:59 +01:00
Ettore Di Giacinto
6be8c0c618 chore(model gallery): add localai-functioncall-qwen2.5-7b-v0.5 (#4796)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 12:07:35 +01:00
Dave
3cddf24747 feat: Centralized Request Processing middleware (#3847)
* squash past, centralize request middleware PR

Signed-off-by: Dave Lee <dave@gray101.com>

* migrate bruno request files to examples repo

Signed-off-by: Dave Lee <dave@gray101.com>

* fix

Signed-off-by: Dave Lee <dave@gray101.com>

* Update tests/e2e-aio/e2e_test.go

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-10 12:06:16 +01:00
Ettore Di Giacinto
c330360785 chore(model gallery): add ilsp_llama-krikri-8b-instruct (#4795)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-10 09:54:54 +01:00
LocalAI [bot]
8cd51570e5 chore: ⬆️ Update ggerganov/llama.cpp to 19d3c8293b1f61acbe2dab1d49a17950fd788a4a (#4793)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-09 22:12:01 +00:00
Ettore Di Giacinto
0e7aa5cd15 chore(model gallery): add subtleone_qwen2.5-32b-erudite-writer (#4792)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-09 10:59:46 +01:00
Ettore Di Giacinto
e06a5f49de chore(model gallery): add huihui-ai_deepseek-r1-distill-llama-70b-abliterated (#4790)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-09 10:53:48 +01:00
Dave
fb2f847507 chore: migrate bruno request files to examples repo (#4788)
migrate bruno request files to examples repo

Signed-off-by: Dave Lee <dave@gray101.com>
2025-02-09 10:52:28 +01:00
LocalAI [bot]
e01acc88c9 chore: ⬆️ Update ggerganov/llama.cpp to e6e658319952f7ad269dc11275b9edddc721fc6d (#4787)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-08 21:57:40 +00:00
LocalAI [bot]
7a5912908a chore: ⬆️ Update ggerganov/llama.cpp to d2fe216fb2fb7ca8627618c9ea3a2e7886325780 (#4780)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-08 09:44:34 +01:00
Ettore Di Giacinto
4b1b942a7f chore(model gallery): add sicariussicariistuff_redemption_wind_24b (#4781)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-08 09:04:18 +01:00
Ettore Di Giacinto
230fe0098f chore(model gallery): add cognitivecomputations_dolphin3.0-mistral-24b (#4779)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-07 13:33:24 +01:00
Ettore Di Giacinto
cc163429dc chore(model gallery): add cognitivecomputations_dolphin3.0-r1-mistral-24b (#4778)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-07 13:31:49 +01:00
Ettore Di Giacinto
f670e0a91c chore(model gallery): add nohobby_l3.3-prikol-70b-v0.5 (#4777)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-07 13:29:53 +01:00
LocalAI [bot]
731674eee7 chore: ⬆️ Update ggerganov/llama.cpp to 8a59053f63fffc24e730cd3ea067760abfe4a919 (#4776)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-06 22:02:00 +00:00
Ettore Di Giacinto
cc1f6f913f fix(llama.cpp): disable mirostat as default (#2911)
Even if increasing the quality of the output, it has shown to have
performance drawbacks to be so noticeable that the confuses users about
speed of LocalAI ( see also
https://github.com/mudler/LocalAI/issues/2780 ).

This changeset disables Mirostat by default (which can
be still enabled manually).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Dave <dave@gray101.com>
2025-02-06 19:39:59 +01:00
Ettore Di Giacinto
7f90ff7aec chore(llama-ggml): drop deprecated backend (#4775)
The GGML format is now dead, since in the next version of LocalAI we
already bring many breaking compatibility changes, taking the occasion
also to drop ggml support (pre-gguf).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 18:36:23 +01:00
Ettore Di Giacinto
8d45670e41 fix(openai): consistently return stop reason (#4771)
We were not returning a stop reason when no tool was actually called
(even if specified).

Fixes: https://github.com/mudler/LocalAI/issues/4716

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 12:41:08 +01:00
Ettore Di Giacinto
e4b8ddb6a1 chore(model gallery): add black-ink-guild_pernicious_prophecy_70b (#4774)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 12:03:59 +01:00
Ettore Di Giacinto
a801561f81 chore(model gallery): add tiger-lab_qwen2.5-32b-instruct-cft (#4773)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 12:01:56 +01:00
Ettore Di Giacinto
16ced07102 chore(model gallery): add arliai_llama-3.3-70b-arliai-rpmax-v1.4 (#4772)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-06 11:59:14 +01:00
LocalAI [bot]
d35595372d chore: ⬆️ Update ggerganov/llama.cpp to d774ab3acc4fee41fbed6dbfc192b57d5f79f34b (#4770)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-06 09:02:51 +01:00
LocalAI [bot]
81be192279 chore: ⬆️ Update leejet/stable-diffusion.cpp to d46ed5e184b97c2018dc2e8105925bdb8775e02c (#4769)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-05 23:49:15 +00:00
Ettore Di Giacinto
28a1310890 chore(docs): enhance visibility
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 19:50:32 +01:00
Ettore Di Giacinto
2a702e9ca4 chore(docs): small updates
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 19:49:11 +01:00
Ettore Di Giacinto
3ecaea1b6e chore(docs): update sponsors in the website
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 19:41:55 +01:00
Ettore Di Giacinto
7daf5ac3e3 fix(gallery): do not return overrides and additional config (#4768)
When hitting /models/available we are intersted in the model
description, name and small metadatas. Configuration and overrides are
part of internals which are required only for installation.

This also solves a current bug when hitting /models/available fails if
one of the gallery items have overrides with parameters defined

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 18:37:09 +01:00
Ettore Di Giacinto
7bc80c17f8 chore(model gallery): add LocalAI-functioncall-llama3.2-3b-v0.5 (#4766)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:19:31 +01:00
Ettore Di Giacinto
1996ceb293 chore(model gallery): add krutrim-ai-labs_krutrim-2-instruct (#4765)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:17:05 +01:00
Ettore Di Giacinto
0bc3dc43da chore(model gallery): add rubenroy_gilgamesh-72b (#4764)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:13:21 +01:00
Ettore Di Giacinto
3324c4e6cb chore(model gallery): add agi-0_art-skynet-3b (#4763)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-05 10:09:33 +01:00
LocalAI [bot]
7329db4e78 chore: ⬆️ Update ggerganov/llama.cpp to 3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904 (#4762)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-04 21:48:49 +00:00
Ettore Di Giacinto
464686aee6 chore(model gallery): add suayptalha_maestro-10b (#4760)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-04 09:51:54 +01:00
Ettore Di Giacinto
bfa3d4ccff chore(model gallery): add nohobby_l3.3-prikol-70b-v0.4 (#4759)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-04 09:50:18 +01:00
Ettore Di Giacinto
6a91288c8c chore(model gallery): add fblgit_miniclaus-qw1.5b-unamgs-grpo (#4758)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-04 09:45:52 +01:00
dependabot[bot]
96cb407ee0 chore(deps): Bump docs/themes/hugo-theme-relearn from 5bcb9fe to 66bc366 (#4750)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `5bcb9fe` to `66bc366`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](5bcb9fe5e6...66bc366c47)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-04 08:57:19 +01:00
dependabot[bot]
5a19094d3a chore(deps): Bump sentence-transformers from 3.4.0 to 3.4.1 in /backend/python/transformers (#4748)
chore(deps): Bump sentence-transformers in /backend/python/transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.4.0 to 3.4.1.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.4.0...v3.4.1)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-04 08:56:51 +01:00
LocalAI [bot]
e3b943ffcb chore: ⬆️ Update ggerganov/llama.cpp to 5598f475be3e31430fbe17ebb85654ec90dc201e (#4757)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-04 08:56:11 +01:00
dependabot[bot]
df30d6a482 chore(deps): Bump GrantBirki/git-diff-action from 2.7.0 to 2.8.0 (#4746)
Bumps [GrantBirki/git-diff-action](https://github.com/grantbirki/git-diff-action) from 2.7.0 to 2.8.0.
- [Release notes](https://github.com/grantbirki/git-diff-action/releases)
- [Commits](https://github.com/grantbirki/git-diff-action/compare/v2.7.0...v2.8.0)

---
updated-dependencies:
- dependency-name: GrantBirki/git-diff-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-03 22:21:40 +00:00
Ettore Di Giacinto
c3c27b7e3d chore(model gallery): small fixups to llama3.2-fcall template
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 17:58:57 +01:00
Ettore Di Giacinto
431716d4d6 fix(gallery): remove box token to llama3.2-fcall
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-02-03 16:10:33 +01:00
Ettore Di Giacinto
d290fd159f chore(model gallery): add LocalAI-functioncall-llama3.2-1b-v0.4 (#4740)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 15:55:49 +01:00
Ettore Di Giacinto
051faaf771 chore(model gallery): add uncensoredai_uncensoredlm-deepseek-r1-distill-qwen-14b (#4739)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 10:46:47 +01:00
Ettore Di Giacinto
41a2dfb0d9 chore(model gallery): add thedrummer_gemmasutra-pro-27b-v1.1 (#4738)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 10:37:24 +01:00
Ettore Di Giacinto
ed0094c3d0 chore(model gallery): add steelskull_l3.3-damascus-r1 (#4737)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-03 10:30:07 +01:00
LocalAI [bot]
52fadeded1 feat(swagger): update swagger (#4735)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-03 10:16:42 +01:00
LocalAI [bot]
a37fa8d9c4 chore: ⬆️ Update ggerganov/llama.cpp to 90f9b88afb6447d3929843a2aa98c0f11074762d (#4736)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-02 22:18:30 +00:00
Shraddha
03974a4dd4 feat: tokenization with llama.cpp (#4724)
feat: tokenization

Signed-off-by: shraddhazpy <shraddha@shraddhafive.in>
2025-02-02 17:39:43 +00:00
Ettore Di Giacinto
1d6afbd65d feat(llama.cpp): Add support to grammar triggers (#4733)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-02 13:25:03 +01:00
LocalAI [bot]
d79f02ea09 chore: ⬆️ Update ggerganov/llama.cpp to 53debe6f3c9cca87e9520a83ee8c14d88977afa4 (#4732)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-02-01 21:45:26 +00:00
Ettore Di Giacinto
ba2f426e3e chore(model gallery): add fuseo1-deekseekr1-qwq-skyt1-32b-preview (#4731)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-02-01 10:12:15 +01:00
LocalAI [bot]
732042e5c6 chore: ⬆️ Update ggerganov/llama.cpp to aa6fb1321333fae8853d0cdc26bcb5d438e650a1 (#4728)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-31 22:31:00 +00:00
Ettore Di Giacinto
f1763aabf2 chore(model gallery): add taid-llm-1.5b (#4727)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 14:53:39 +01:00
Ettore Di Giacinto
e0d90b173b chore(model gallery): add tinyswallow-1.5b-instruct (#4726)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 14:49:02 +01:00
Ettore Di Giacinto
ff07612bfa chore(model gallery): add mistral-small-24b-instruct-2501 (#4725)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 14:45:42 +01:00
LocalAI [bot]
7badaf78a0 chore: ⬆️ Update ggerganov/llama.cpp to 8b576b6c55bc4e6be898b47522f0ef402b93ef62 (#4722)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-01-31 11:31:46 +00:00
Ettore Di Giacinto
af41436f1b fix(tests): pin to branch for config used in tests (#4721)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-31 09:57:58 +01:00
LocalAI [bot]
cd5489ce47 chore(model-gallery): ⬆️ update checksum (#4723)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-31 08:51:32 +01:00
Ettore Di Giacinto
60ec2cf751 chore(model gallery): add openthinker-7b (#4720)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 16:44:44 +01:00
Ettore Di Giacinto
244f4b564f chore(model gallery): add selene-1-mini-llama-3.1-8b (#4719)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 16:42:48 +01:00
Ettore Di Giacinto
f1d6d65417 chore(model gallery): add virtuoso-lite (#4718)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 16:38:35 +01:00
Ettore Di Giacinto
72e52c4f6a chore: drop embedded models (#4715)
Since the remote gallery was introduced this is now completely
superseded by it. In order to keep the code clean and remove redudant
parts let's simplify the usage.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-30 00:03:01 +01:00
LocalAI [bot]
1656e1a88e chore: ⬆️ Update ggerganov/llama.cpp to eb7cf15a808d4d7a71eef89cc6a9b96fe82989dc (#4717)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-29 21:45:38 +00:00
Ettore Di Giacinto
7f62b418a4 chore(docs): add documentation for l4t images
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-29 15:16:07 +01:00
Maximilian Kenfenheuer
1f4e66d638 chore(model gallery): add specific message templates for llama3.2 based models (#4707)
* chore(model gallery): add specific message templates for llama3.2 based models

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

* fix: yaml lint in llama3.2-quantized.yaml

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

* fix: yaml lint in llama3.2-quantized.yaml

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

---------

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-29 10:19:48 +01:00
Maximilian Kenfenheuer
a37b2c765c docs: update advanced-usage.md to reflect changes in #4700 (#4709)
Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-28 22:58:35 +01:00
Maximilian Kenfenheuer
b4b67e00bd refactor: function argument parsing using named regex (#4708)
Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-28 22:58:02 +01:00
LocalAI [bot]
91e1ff5a95 chore: ⬆️ Update ggerganov/llama.cpp to cae9fb4361138b937464524eed907328731b81f6 (#4711)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-28 21:45:14 +00:00
dependabot[bot]
d9204ea3b5 chore(deps): Bump dependabot/fetch-metadata from 2.2.0 to 2.3.0 (#4701)
Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 2.2.0 to 2.3.0.
- [Release notes](https://github.com/dependabot/fetch-metadata/releases)
- [Commits](https://github.com/dependabot/fetch-metadata/compare/v2.2.0...v2.3.0)

---
updated-dependencies:
- dependency-name: dependabot/fetch-metadata
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-28 11:50:09 +01:00
LocalAI [bot]
3d0fbcb4f7 chore: ⬆️ Update ggerganov/llama.cpp to a4417ddda98fd0558fb4d802253e68a933704b59 (#4705)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-28 09:13:43 +01:00
dependabot[bot]
03f3df9a82 chore(deps): Bump docs/themes/hugo-theme-relearn from 8dad5ee to 5bcb9fe (#4704)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `8dad5ee` to `5bcb9fe`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](8dad5ee419...5bcb9fe5e6)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-28 09:13:00 +01:00
dependabot[bot]
fff35d5528 chore(deps): Bump sentence-transformers from 3.3.1 to 3.4.0 in /backend/python/transformers (#4702)
chore(deps): Bump sentence-transformers in /backend/python/transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.3.1 to 3.4.0.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.3.1...v3.4.0)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-27 21:09:50 +00:00
Maximilian Kenfenheuer
539e94db73 feat: function argument parsing using named regex (#4700)
Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
2025-01-27 15:53:05 +00:00
Ettore Di Giacinto
0f4f62cf3c chore(model gallery): add fuseo1-deepseekr1-qwq-32b-preview (#4699)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:51:06 +01:00
Ettore Di Giacinto
e7cffd7afa chore(model gallery): add fuseo1-deepseekr1-qwen2.5-instruct-32b-preview (#4698)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:31:47 +01:00
Ettore Di Giacinto
26d790a2b6 chore(model gallery): add fuseo1-deepseekr1-qwen2.5-coder-32b-preview-v0.1 (#4697)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:28:29 +01:00
Ettore Di Giacinto
5cf838c08d chore(model gallery): add confucius-o1-14b (#4696)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-27 09:26:00 +01:00
LocalAI [bot]
4db8f5cbce chore: ⬆️ Update ggerganov/llama.cpp to 178a7eb952d211b8d4232d5e50ae1b64519172a9 (#4694)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-26 21:44:54 +00:00
Ettore Di Giacinto
3b6b37a81b chore(model gallery): add deepseek-r1-qwen-2.5-32b-ablated (#4693)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-26 10:06:06 +01:00
Ettore Di Giacinto
8f5aa2d9de chore(model gallery): add dumpling-qwen2.5-32b (#4692)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-26 10:03:46 +01:00
Ettore Di Giacinto
a6bc8aa7c7 chore(model gallery): add l3.3-nevoria-r1-70b (#4691)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-26 10:01:37 +01:00
LocalAI [bot]
4ab107bc1a chore: ⬆️ Update ggerganov/llama.cpp to 26771a1491f3a4c3d5b99c4c267b81aca9a7dfa0 (#4690)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-25 21:44:14 +00:00
Ettore Di Giacinto
4c3710a531 chore(model gallery): add chuluun-qwen2.5-72b-v0.08 (#4689)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-25 11:07:31 +01:00
Ettore Di Giacinto
901b06284a chore(model gallery): add art-v0-3b (#4688)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-25 11:06:05 +01:00
Ettore Di Giacinto
8eef5a2c5e chore(model gallery): add lamarck-14b-v0.7 (#4687)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-25 11:04:12 +01:00
Gianluca Boiano
e9cace137b chore(model gallery): update deepseek-r1 prompt template (#4686)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-01-25 09:04:38 +01:00
LocalAI [bot]
9409c99738 chore: ⬆️ Update ggerganov/llama.cpp to c5d9effb49649db80a52caf5c0626de6f342f526 (#4685)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-01-24 21:45:54 +00:00
Ettore Di Giacinto
4d44ebc2f2 chore(deps): bump grpcio to 1.70.0 (#4682)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-24 10:18:22 +01:00
196 changed files with 244605 additions and 3808 deletions

View File

@@ -1,23 +0,0 @@
meta {
name: musicgen
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model_id": "facebook/musicgen-small",
"text": "Exciting 80s Newscast Interstitial",
"duration_seconds": 8
}
}

View File

@@ -1,17 +0,0 @@
meta {
name: backend monitor
type: http
seq: 4
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
body: json
auth: none
}
body:json {
{
"model": "{{DEFAULT_MODEL}}"
}
}

View File

@@ -1,21 +0,0 @@
meta {
name: backend-shutdown
type: http
seq: 3
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}"
}
}

View File

@@ -1,5 +0,0 @@
{
"version": "1",
"name": "LocalAI Test Requests",
"type": "collection"
}

View File

@@ -1,6 +0,0 @@
vars {
HOST: localhost
PORT: 8080
DEFAULT_MODEL: gpt-3.5-turbo
PROTOCOL: http://
}

View File

@@ -1,11 +0,0 @@
meta {
name: get models list
type: http
seq: 2
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
body: none
auth: none
}

View File

@@ -1,25 +0,0 @@
meta {
name: Generate image
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"prompt": "<positive prompt>|<negative prompt>",
"model": "model-name",
"step": 51,
"size": "1024x1024",
"image": ""
}
}

View File

@@ -1,24 +0,0 @@
meta {
name: -completions
type: http
seq: 4
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"prompt": "function downloadFile(string url, string outputPath) {",
"max_tokens": 256,
"temperature": 0.5
}
}

View File

@@ -1,23 +0,0 @@
meta {
name: -edits
type: http
seq: 5
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"input": "What day of the wek is it?",
"instruction": "Fix the spelling mistakes"
}
}

View File

@@ -1,22 +0,0 @@
meta {
name: -embeddings
type: http
seq: 6
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
}
}

View File

@@ -1,30 +0,0 @@
meta {
name: chat completion -simple- 1 message-
type: http
seq: 4
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"messages": [
{
"role": "user",
"content": "How could one use friction to cook an egg?"
}
],
"max_tokens": 256,
"temperature": 0.2,
"grammar": ""
}
}

View File

@@ -1,29 +0,0 @@
meta {
name: chat-completions -long-
type: http
seq: 5
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
{"role": "user", "content": "How could one use electricity to cook an egg?"},
{"role": "assistant",
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
},
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
"max_tokens": 1024,
"temperature": 0.5
}
}

View File

@@ -1,25 +0,0 @@
meta {
name: chat-completions -stream-
type: http
seq: 6
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
"max_tokens": 256,
"temperature": 0.9,
"stream": true
}
}

View File

@@ -1,22 +0,0 @@
meta {
name: add model gallery
type: http
seq: 10
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
"name": "test"
}
}

View File

@@ -1,21 +0,0 @@
meta {
name: delete model gallery
type: http
seq: 11
}
delete {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"name": "test"
}
}

View File

@@ -1,11 +0,0 @@
meta {
name: list MODELS in galleries
type: http
seq: 7
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
body: none
auth: none
}

View File

@@ -1,11 +0,0 @@
meta {
name: list model GALLERIES
type: http
seq: 8
}
get {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: none
auth: none
}

View File

@@ -1,11 +0,0 @@
meta {
name: model delete
type: http
seq: 7
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: none
auth: none
}

View File

@@ -1,21 +0,0 @@
meta {
name: model gallery apply -gist-
type: http
seq: 12
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
}
}

View File

@@ -1,22 +0,0 @@
meta {
name: model gallery apply
type: http
seq: 9
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
"name": "codellama7b"
}
}

View File

Binary file not shown.

View File

@@ -1,16 +0,0 @@
meta {
name: transcribe
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
body: multipartForm
auth: none
}
body:multipart-form {
file: @file(transcription/gb1.ogg)
model: whisper-1
}

View File

@@ -1,22 +0,0 @@
meta {
name: -tts
type: http
seq: 2
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"model": "{{DEFAULT_MODEL}}",
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
}
}

View File

@@ -1,23 +0,0 @@
meta {
name: musicgen
type: http
seq: 2
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
body: json
auth: none
}
headers {
Content-Type: application/json
}
body:json {
{
"backend": "transformers",
"model": "facebook/musicgen-small",
"input": "80s Synths playing Jazz"
}
}

2
.github/labeler.yml vendored
View File

@@ -1,4 +1,4 @@
enhancements:
enhancement:
- head-branch: ['^feature', 'feature']
dependencies:

View File

@@ -9,7 +9,7 @@ jobs:
fail-fast: false
matrix:
include:
- repository: "ggerganov/llama.cpp"
- repository: "ggml-org/llama.cpp"
variable: "CPPLLAMA_VERSION"
branch: "master"
- repository: "ggerganov/whisper.cpp"

View File

@@ -14,7 +14,7 @@ jobs:
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@v2.2.0
uses: dependabot/fetch-metadata@v2.3.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true

View File

@@ -33,7 +33,7 @@ jobs:
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.2.0
uses: appleboy/ssh-action@v1.2.1
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.2.0
uses: appleboy/ssh-action@v1.2.1
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -2,9 +2,10 @@ name: 'generate and publish GRPC docker caches'
on:
workflow_dispatch:
push:
branches:
- master
schedule:
# daily at midnight
- cron: '0 0 * * *'
concurrency:
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -16,7 +17,7 @@ jobs:
matrix:
include:
- grpc-base-image: ubuntu:22.04
runs-on: 'ubuntu-latest'
runs-on: 'arc-runner-set'
platforms: 'linux/amd64,linux/arm64'
runs-on: ${{matrix.runs-on}}
steps:

View File

@@ -310,6 +310,11 @@ jobs:
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
- name: Cleanup
run: |
docker builder prune -f
docker system prune --force --volumes --all
- name: Latest tag
# run this on branches, when it is a tag and there is a latest-image defined
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'

View File

@@ -18,7 +18,7 @@ jobs:
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
- uses: GrantBirki/git-diff-action@v2.8.0
id: git-diff-action
with:
json_diff_file_output: diff.json
@@ -99,7 +99,7 @@ jobs:
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
- uses: GrantBirki/git-diff-action@v2.8.0
id: git-diff-action
with:
json_diff_file_output: diff.json

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -6,9 +6,7 @@ BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
CPPLLAMA_VERSION?=1782cdfed60952f9ff333fc2ab5245f2be702453
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -24,7 +22,7 @@ BARKCPP_VERSION?=v1.0.0
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
STABLEDIFFUSION_GGML_VERSION?=19d876ee300a055629926ff836489901f734f2b7
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
@@ -151,7 +149,6 @@ ifeq ($(BUILD_TYPE),hipblas)
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export GGML_HIP=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
@@ -188,7 +185,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
@@ -222,19 +218,6 @@ endif
all: help
## go-llama.cpp
sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp
cd sources/go-llama.cpp && \
git init && \
git remote add origin $(GOLLAMA_REPO) && \
git fetch origin && \
git checkout $(GOLLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## bark.cpp
sources/bark.cpp:
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
@@ -310,19 +293,17 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
$(GOCMD) mod download
@@ -330,7 +311,6 @@ prepare-sources: get-sources replace
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-piper clean
$(MAKE) build
@@ -434,7 +414,7 @@ run: prepare ## run local-ai
test-models/testmodel.ggml:
mkdir test-models
mkdir test-dir
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -449,8 +429,7 @@ test: prepare test-models/testmodel.ggml grpcs
export GO_TAGS="tts debug"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-llama
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-llama-gguf
$(MAKE) test-tts
$(MAKE) test-stablediffusion
@@ -479,10 +458,6 @@ teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
@@ -760,13 +735,6 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml
endif
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
@@ -861,7 +829,7 @@ swagger:
.PHONY: gen-assets
gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
## Documentation
docs/layouts/_default:

View File

@@ -212,7 +212,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
<p align="center">
<a href="https://www.spectrocloud.com/" target="blank">
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
<img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
</a>
<a href="https://www.premai.io/" target="blank">
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>

8
aio/cpu/vad.yaml Normal file
View File

@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -129,7 +129,7 @@ detect_gpu
detect_gpu_size
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
check_vars

8
aio/gpu-8g/vad.yaml Normal file
View File

@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

8
aio/intel/vad.yaml Normal file
View File

@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -163,6 +163,11 @@ message Reply {
double timing_token_generation = 5;
}
message GrammarTrigger {
string word = 1;
bool at_start = 2;
}
message ModelOptions {
string Model = 1;
int32 ContextSize = 2;
@@ -224,6 +229,11 @@ message ModelOptions {
int32 MaxModelLen = 54;
int32 TensorParallelSize = 55;
string LoadFormat = 58;
bool DisableLogStatus = 66;
string DType = 67;
int32 LimitImagePerPrompt = 68;
int32 LimitVideoPerPrompt = 69;
int32 LimitAudioPerPrompt = 70;
string MMProj = 41;
@@ -247,6 +257,8 @@ message ModelOptions {
string CacheTypeKey = 63;
string CacheTypeValue = 64;
repeated GrammarTrigger GrammarTriggers = 65;
}
message Result {

View File

@@ -468,6 +468,9 @@ struct llama_server_context
bool add_bos_token = true;
bool has_eos_token = true;
bool grammar_lazy = false;
std::vector<common_grammar_trigger> grammar_trigger_words;
int32_t n_ctx; // total context for all clients / slots
// system prompt
@@ -706,6 +709,8 @@ struct llama_server_context
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
slot->sparams.grammar_trigger_words = grammar_trigger_words;
slot->sparams.grammar_lazy = grammar_lazy;
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
// Might be better to reject the request with a 400 ?
@@ -1150,6 +1155,14 @@ struct llama_server_context
slot.has_next_token = false;
}
if (slot.n_past >= slot.n_ctx) {
slot.truncated = true;
slot.stopped_limit = true;
slot.has_next_token = false;
LOG_VERBOSE("stopped due to running out of context capacity", {});
}
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
{
slot.stopped_eos = true;
@@ -1622,17 +1635,17 @@ struct llama_server_context
{
if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
{
// this check is redundant (for good)
// we should never get here, because generation should already stopped in process_token()
// START LOCALAI changes
// Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969)
// See: https://github.com/mudler/LocalAI/issues/1333
// Context is exhausted, release the slot
slot.release();
send_final_response(slot);
slot.cache_tokens.clear();
slot.n_past = 0;
slot.truncated = false;
slot.has_next_token = true;
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
slot.has_next_token = false;
LOG_ERROR("context is exhausted, release the slot", {});
continue;
// END LOCALAI changes
@@ -2374,6 +2387,21 @@ static void params_parse(const backend::ModelOptions* request,
if ( request->ropefreqscale() != 0.0f ) {
params.rope_freq_scale = request->ropefreqscale();
}
if (request->grammartriggers_size() > 0) {
LOG_INFO("configuring grammar triggers", {});
llama.grammar_lazy = true;
for (int i = 0; i < request->grammartriggers_size(); i++) {
common_grammar_trigger trigger;
trigger.word = request->grammartriggers(i).word();
trigger.at_start = request->grammartriggers(i).at_start();
llama.grammar_trigger_words.push_back(trigger);
LOG_INFO("grammar trigger", {
{ "word", trigger.word },
{ "at_start", trigger.at_start }
});
}
}
}
@@ -2522,6 +2550,18 @@ public:
return grpc::Status::OK;
}
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
json data = parse_options(false, request, llama);
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
for (int i=0 ; i< tokens.size(); i++){
response->add_tokens(tokens[i]);
}
return grpc::Status::OK;
}
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot();

View File

@@ -35,6 +35,8 @@ const char* sample_method_str[] = {
"ipndm",
"ipndm_v",
"lcm",
"ddim_trailing",
"tcd",
};
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
@@ -173,6 +175,7 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
-1, //clip_skip
cfg_scale, // sfg_scale
3.5f,
0, // eta
width,
height,
sample_method,

View File

@@ -1,204 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type LLM struct {
base.SingleThread
llama *llama.LLama
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
llamaOpts := []llama.ModelOption{
llama.WithRopeFreqBase(ropeFreqBase),
llama.WithRopeFreqScale(ropeFreqScale),
}
if opts.NGQA != 0 {
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
}
if opts.RMSNormEps != 0 {
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
}
if opts.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
}
if opts.F16Memory {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if opts.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if opts.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
}
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
} else {
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
}
if opts.NUMA {
llamaOpts = append(llamaOpts, llama.EnableNUMA)
}
if opts.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}
model, err := llama.New(opts.ModelFile, llamaOpts...)
llm.llama = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
predictOptions := []llama.PredictOption{
llama.SetTemperature(opts.Temperature),
llama.SetTopP(opts.TopP),
llama.SetTopK(int(opts.TopK)),
llama.SetTokens(int(opts.Tokens)),
llama.SetThreads(int(opts.Threads)),
llama.WithGrammar(opts.Grammar),
llama.SetRopeFreqBase(ropeFreqBase),
llama.SetRopeFreqScale(ropeFreqScale),
llama.SetNegativePromptScale(opts.NegativePromptScale),
llama.SetNegativePrompt(opts.NegativePrompt),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
}
if opts.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
}
if opts.F16KV {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
results <- token
return true
}))
go func() {
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
return nil
}
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
predictOptions := buildPredictOptions(opts)
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
}
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
}

View File

@@ -1,19 +0,0 @@
package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.69.0
grpcio==1.70.0
protobuf
certifi
transformers

View File

@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.69.0
grpcio==1.70.0
protobuf
certifi

View File

@@ -1,3 +1,3 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
grpcio-tools

View File

@@ -1,4 +1,4 @@
transformers
transformers==4.48.3
accelerate
torch==2.4.1
coqui-tts

View File

@@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118
torchaudio==2.4.1+cu118
transformers
transformers==4.48.3
accelerate
coqui-tts

View File

@@ -1,5 +1,5 @@
torch==2.4.1
torchaudio==2.4.1
transformers
transformers==4.48.3
accelerate
coqui-tts

View File

@@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0
torchaudio==2.4.1+rocm6.0
transformers
transformers==4.48.3
accelerate
coqui-tts

View File

@@ -5,6 +5,6 @@ torchaudio==2.3.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu
optimum[openvino]
setuptools
transformers
transformers==4.48.3
accelerate
coqui-tts

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
certifi
packaging==24.1

View File

@@ -159,6 +159,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
torchType = torch.float16
variant = "fp16"
options = request.Options
# empty dict
self.options = {}
# The options are a list of strings in this form optname:optvalue
# We are storing all the options in a dict so we can use it later when
# generating the images
for opt in options:
key, value = opt.split(":")
self.options[key] = value
local = False
modelFile = request.Model
@@ -441,6 +453,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
kwargs = {key: options.get(key) for key in keys if key in options}
# populate kwargs from self.options.
kwargs.update(self.options)
# Set seed
if request.seed > 0:
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(

View File

@@ -1,5 +1,5 @@
setuptools
grpcio==1.69.0
grpcio==1.70.0
pillow
protobuf
certifi

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
certifi
wheel

View File

@@ -1,3 +1,3 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
grpcio-tools

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
phonemizer
scipy

View File

@@ -1,3 +1,3 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
certifi

View File

@@ -5,4 +5,4 @@ accelerate
transformers
bitsandbytes
outetts
sentence-transformers==3.3.1
sentence-transformers==3.4.1

View File

@@ -6,4 +6,4 @@ accelerate
transformers
bitsandbytes
outetts
sentence-transformers==3.3.1
sentence-transformers==3.4.1

View File

@@ -5,4 +5,4 @@ numba==0.60.0
transformers
bitsandbytes
outetts
sentence-transformers==3.3.1
sentence-transformers==3.4.1

View File

@@ -7,4 +7,4 @@ numba==0.60.0
bitsandbytes
outetts
bitsandbytes
sentence-transformers==3.3.1
sentence-transformers==3.4.1

View File

@@ -8,4 +8,4 @@ numba==0.60.0
intel-extension-for-transformers
bitsandbytes
outetts
sentence-transformers==3.3.1
sentence-transformers==3.4.1

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
certifi
setuptools

View File

@@ -109,6 +109,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
engine_args.swap_space = request.SwapSpace
if request.MaxModelLen != 0:
engine_args.max_model_len = request.MaxModelLen
if request.DisableLogStatus:
engine_args.disable_log_status = request.DisableLogStatus
if request.DType != "":
engine_args.dtype = request.DType
if request.LimitImagePerPrompt != 0 or request.LimitVideoPerPrompt != 0 or request.LimitAudioPerPrompt != 0:
# limit-mm-per-prompt defaults to 1 per modality, based on vLLM docs
engine_args.limit_mm_per_prompt = {
"image": max(request.LimitImagePerPrompt, 1),
"video": max(request.LimitVideoPerPrompt, 1),
"audio": max(request.LimitAudioPerPrompt, 1)
}
try:
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
@@ -269,7 +280,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
def load_image(self, image_path: str):
"""
Load an image from the given file path or base64 encoded data.
Args:
image_path (str): The path to the image file or base64 encoded data.
@@ -288,7 +299,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
def load_video(self, video_path: str):
"""
Load a video from the given file path.
Args:
video_path (str): The path to the image file.
@@ -335,4 +346,4 @@ if __name__ == "__main__":
)
args = parser.parse_args()
asyncio.run(serve(args.addr))
asyncio.run(serve(args.addr))

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0
grpcio==1.70.0
protobuf
certifi
setuptools

View File

@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
log.Error().Err(err).Msg("error installing models")
}
@@ -145,13 +145,7 @@ func New(opts ...config.AppOption) (*Application, error) {
if options.LoadToMemory != nil {
for _, m := range options.LoadToMemory {
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
config.LoadOptionDebug(options.Debug),
config.LoadOptionThreads(options.Threads),
config.LoadOptionContextSize(options.ContextSize),
config.LoadOptionF16(options.F16),
config.ModelPath(options.ModelPath),
)
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
if err != nil {
return nil, err
}

View File

@@ -33,7 +33,7 @@ type TokenUsage struct {
TimingTokenGeneration float64
}
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
// Check if the modelFile exists, if it doesn't try to load it from the gallery
@@ -48,7 +48,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
}
}
opts := ModelOptions(c, o)
opts := ModelOptions(*c, o)
inferenceModel, err := loader.Load(opts...)
if err != nil {
return nil, err
@@ -84,7 +84,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts := gRPCPredictOpts(*c, loader.ModelPath)
opts.Prompt = s
opts.Messages = protoMessages
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate

View File

@@ -118,9 +118,19 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
nGPULayers = *c.NGPULayers
}
triggers := make([]*pb.GrammarTrigger, 0)
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
triggers = append(triggers, &pb.GrammarTrigger{
Word: t.Word,
AtStart: t.AtStart,
})
}
return &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
GrammarTriggers: triggers,
PipelineType: c.Diffusers.PipelineType,
CFGScale: c.CFGScale,
LoraAdapter: c.LoraAdapter,
@@ -149,6 +159,12 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
SwapSpace: int32(c.SwapSpace),
MaxModelLen: int32(c.MaxModelLen),
TensorParallelSize: int32(c.TensorParallelSize),
DisableLogStatus: c.DisableLogStatus,
DType: c.DType,
// LimitMMPerPrompt vLLM
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
MMProj: c.MMProj,
FlashAttention: c.FlashAttention,
CacheTypeKey: c.CacheTypeK,

View File

@@ -9,10 +9,10 @@ import (
model "github.com/mudler/LocalAI/pkg/model"
)
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
opts := ModelOptions(backendConfig, appConfig)
rerankModel, err := loader.Load(opts...)
if err != nil {
return nil, err
}

View File

@@ -13,7 +13,6 @@ import (
)
func SoundGeneration(
modelFile string,
text string,
duration *float32,
temperature *float32,
@@ -25,8 +24,9 @@ func SoundGeneration(
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig)
soundGenModel, err := loader.Load(opts...)
if err != nil {
return "", nil, err
}
@@ -44,7 +44,7 @@ func SoundGeneration(
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
Text: text,
Model: modelFile,
Model: backendConfig.Model,
Dst: filePath,
Sample: doSample,
Duration: duration,

View File

@@ -4,24 +4,17 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/model"
)
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
modelFile := backendConfig.Model
var inferenceModel grpc.Backend
var err error
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig)
inferenceModel, err = loader.Load(opts...)
if backendConfig.Backend == "" {
inferenceModel, err = loader.Load(opts...)
} else {
opts = append(opts, model.WithBackendString(backendConfig.Backend))
inferenceModel, err = loader.Load(opts...)
}
if err != nil {
return schema.TokenizeResponse{}, err
}
@@ -35,6 +28,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
return schema.TokenizeResponse{}, err
}
if resp.Tokens == nil {
resp.Tokens = make([]int32, 0)
}
return schema.TokenizeResponse{
Tokens: resp.Tokens,
}, nil

View File

@@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
tks = append(tks, int(t))
}
tr.Segments = append(tr.Segments,
schema.Segment{
schema.TranscriptionSegment{
Text: s.Text,
Id: int(s.Id),
Start: time.Duration(s.Start),

View File

@@ -14,28 +14,22 @@ import (
)
func ModelTTS(
backend,
text,
modelFile,
voice,
language string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
}
opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
ttsModel, err := loader.Load(opts...)
if err != nil {
return "", nil, err
}
if ttsModel == nil {
return "", nil, fmt.Errorf("could not load piper model")
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
}
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
@@ -45,22 +39,21 @@ func ModelTTS(
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
filePath := filepath.Join(appConfig.AudioDir, fileName)
// If the model file is not empty, we pass it joined with the model path
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
// This should be addressed in a follow up PR soon.
// Copying it over nearly verbatim, as TTS backends are not functional without this.
modelPath := ""
if modelFile != "" {
// If the model file is not empty, we pass it joined with the model path
// Checking first that it exists and is not outside ModelPath
// TODO: we should actually first check if the modelFile is looking like
// a FS path
mp := filepath.Join(loader.ModelPath, modelFile)
if _, err := os.Stat(mp); err == nil {
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
return "", nil, err
}
modelPath = mp
} else {
modelPath = modelFile
// Checking first that it exists and is not outside ModelPath
// TODO: we should actually first check if the modelFile is looking like
// a FS path
mp := filepath.Join(loader.ModelPath, backendConfig.Model)
if _, err := os.Stat(mp); err == nil {
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
return "", nil, err
}
modelPath = mp
} else {
modelPath = backendConfig.Model // skip this step if it fails?????
}
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{

38
core/backend/vad.go Normal file
View File

@@ -0,0 +1,38 @@
package backend
import (
"context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)
func VAD(request *schema.VADRequest,
ctx context.Context,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*schema.VADResponse, error) {
opts := ModelOptions(backendConfig, appConfig)
vadModel, err := ml.Load(opts...)
if err != nil {
return nil, err
}
req := proto.VADRequest{
Audio: request.Audio,
}
resp, err := vadModel.VAD(ctx, &req)
if err != nil {
return nil, err
}
segments := []schema.VADSegment{}
for _, s := range resp.Segments {
segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
}
return &schema.VADResponse{
Segments: segments,
}, nil
}

View File

@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
}
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
if err != nil {
return err
}

View File

@@ -32,7 +32,6 @@ type RunCMD struct {
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
@@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
config.WithF16(r.F16),
config.WithStringGalleries(r.Galleries),
config.WithModelLibraryURL(r.RemoteLibrary),
config.WithCors(r.CORS),
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
config.WithCsrf(r.CSRF),

View File

@@ -86,13 +86,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend
options.Model = t.Model
var inputFile *string
if t.InputFile != "" {
inputFile = &t.InputFile
}
filePath, _, err := backend.SoundGeneration(t.Model, text,
filePath, _, err := backend.SoundGeneration(text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

View File

@@ -52,8 +52,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend
options.Model = t.Model
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
if err != nil {
return err
}

View File

@@ -44,8 +44,6 @@ type ApplicationConfig struct {
DisableGalleryEndpoint bool
LoadToMemory []string
ModelLibraryURL string
Galleries []Gallery
BackendAssets embed.FS
@@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption {
}
}
func WithModelLibraryURL(url string) AppOption {
return func(o *ApplicationConfig) {
o.ModelLibraryURL = url
}
}
func WithLibPath(path string) AppOption {
return func(o *ApplicationConfig) {
o.LibPath = path

View File

@@ -130,25 +130,28 @@ type LLMConfig struct {
TrimSpace []string `yaml:"trimspace"`
TrimSuffix []string `yaml:"trimsuffix"`
ContextSize *int `yaml:"context_size"`
NUMA bool `yaml:"numa"`
LoraAdapter string `yaml:"lora_adapter"`
LoraBase string `yaml:"lora_base"`
LoraAdapters []string `yaml:"lora_adapters"`
LoraScales []float32 `yaml:"lora_scales"`
LoraScale float32 `yaml:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq"`
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
LoadFormat string `yaml:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
MMProj string `yaml:"mmproj"`
ContextSize *int `yaml:"context_size"`
NUMA bool `yaml:"numa"`
LoraAdapter string `yaml:"lora_adapter"`
LoraBase string `yaml:"lora_base"`
LoraAdapters []string `yaml:"lora_adapters"`
LoraScales []float32 `yaml:"lora_scales"`
LoraScale float32 `yaml:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq"`
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
LoadFormat string `yaml:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
DisableLogStatus bool `yaml:"disable_log_stats"` // vLLM
DType string `yaml:"dtype"` // vLLM
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt"` // vLLM
MMProj string `yaml:"mmproj"`
FlashAttention bool `yaml:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading"`
@@ -166,6 +169,13 @@ type LLMConfig struct {
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
}
// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
type LimitMMPerPrompt struct {
LimitImagePerPrompt int `yaml:"image"`
LimitVideoPerPrompt int `yaml:"video"`
LimitAudioPerPrompt int `yaml:"audio"`
}
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
type AutoGPTQ struct {
ModelBaseName string `yaml:"model_base_name"`
@@ -212,7 +222,15 @@ func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
return err
}
*c = BackendConfig(aux)
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
// Make sure the usecases are valid, we rewrite with what we identified
c.KnownUsecaseStrings = []string{}
for k, usecase := range GetAllBackendConfigUsecases() {
if c.HasUsecases(usecase) {
c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k)
}
}
return nil
}
@@ -287,7 +305,8 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
defaultTopP := 0.95
defaultTopK := 40
defaultTemp := 0.9
defaultMirostat := 2
// https://github.com/mudler/LocalAI/issues/2780
defaultMirostat := 0
defaultMirostatTAU := 5.0
defaultMirostatETA := 0.1
defaultTypicalP := 1.0
@@ -436,19 +455,21 @@ func (c *BackendConfig) HasTemplate() bool {
type BackendConfigUsecases int
const (
FLAG_ANY BackendConfigUsecases = 0b000000000
FLAG_CHAT BackendConfigUsecases = 0b000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
FLAG_EDIT BackendConfigUsecases = 0b000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
FLAG_RERANK BackendConfigUsecases = 0b000010000
FLAG_IMAGE BackendConfigUsecases = 0b000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
FLAG_TTS BackendConfigUsecases = 0b010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
FLAG_ANY BackendConfigUsecases = 0b00000000000
FLAG_CHAT BackendConfigUsecases = 0b00000000001
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
FLAG_EDIT BackendConfigUsecases = 0b00000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
FLAG_RERANK BackendConfigUsecases = 0b00000010000
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
FLAG_TTS BackendConfigUsecases = 0b00010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
FLAG_VAD BackendConfigUsecases = 0b10000000000
// Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
)
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
@@ -463,10 +484,16 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
"FLAG_TTS": FLAG_TTS,
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
"FLAG_TOKENIZE": FLAG_TOKENIZE,
"FLAG_VAD": FLAG_VAD,
"FLAG_LLM": FLAG_LLM,
}
}
func stringToFlag(s string) string {
return "FLAG_" + strings.ToUpper(s)
}
func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
if len(input) == 0 {
return nil
@@ -474,7 +501,7 @@ func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
result := FLAG_ANY
flags := GetAllBackendConfigUsecases()
for _, str := range input {
flag, exists := flags["FLAG_"+strings.ToUpper(str)]
flag, exists := flags[stringToFlag(str)]
if exists {
result |= flag
}
@@ -548,5 +575,18 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
}
}
if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
return false
}
}
if (u & FLAG_VAD) == FLAG_VAD {
if c.Backend != "silero-vad" {
return false
}
}
return true
}

View File

@@ -81,10 +81,10 @@ func readMultipleBackendConfigsFromFile(file string, opts ...ConfigLoaderOption)
c := &[]*BackendConfig{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot read config file %q: %w", file, err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot unmarshal config file %q: %w", file, err)
}
for _, cc := range *c {
@@ -101,10 +101,10 @@ func readBackendConfigFromFile(file string, opts ...ConfigLoaderOption) (*Backen
c := &BackendConfig{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
return nil, fmt.Errorf("readBackendConfigFromFile cannot read config file %q: %w", file, err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
return nil, fmt.Errorf("readBackendConfigFromFile cannot unmarshal config file %q: %w", file, err)
}
c.SetDefaults(opts...)
@@ -117,7 +117,9 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
// Load a config file if present after the model name
cfg := &BackendConfig{
PredictionOptions: schema.PredictionOptions{
Model: modelName,
BasicModelRequest: schema.BasicModelRequest{
Model: modelName,
},
},
}
@@ -145,6 +147,15 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
return cfg, nil
}
func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
LoadOptionDebug(appConfig.Debug),
LoadOptionThreads(appConfig.Threads),
LoadOptionContextSize(appConfig.ContextSize),
LoadOptionF16(appConfig.F16),
ModelPath(appConfig.ModelPath))
}
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
bcl.Lock()
@@ -167,7 +178,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoa
defer bcl.Unlock()
c, err := readBackendConfigFromFile(file, opts...)
if err != nil {
return fmt.Errorf("cannot read config file: %w", err)
return fmt.Errorf("LoadBackendConfig cannot read config file %q: %w", file, err)
}
if c.Validate() {
@@ -324,9 +335,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
bcl.Lock()
defer bcl.Unlock()
entries, err := os.ReadDir(path)
if err != nil {
return fmt.Errorf("cannot read directory '%s': %w", path, err)
return fmt.Errorf("LoadBackendConfigsFromPath cannot read directory '%s': %w", path, err)
}
files := make([]fs.FileInfo, 0, len(entries))
for _, entry := range entries {
@@ -344,13 +356,13 @@ func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...
}
c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
if err != nil {
log.Error().Err(err).Msgf("cannot read config file: %s", file.Name())
log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadBackendConfigsFromPath cannot read config file")
continue
}
if c.Validate() {
bcl.configs[c.Name] = *c
} else {
log.Error().Err(err).Msgf("config is not valid")
log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")
}
}

View File

@@ -48,9 +48,9 @@ parameters:
Expect(config.Name).To(Equal("bar-baz"))
Expect(config.Validate()).To(BeTrue())
// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
httpClient := http.Client{}
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
Expect(err).To(BeNil())
defer resp.Body.Close()
tmp, err = os.CreateTemp("", "config.yaml")

View File

@@ -161,10 +161,11 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
}
// We try to guess only if we don't have a template defined already
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
guessPath := filepath.Join(modelPath, cfg.ModelFileName())
f, err := gguf.ParseGGUFFile(guessPath)
if err != nil {
// Only valid for gguf files
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
log.Debug().Str("filePath", guessPath).Msg("guessDefaultsFromFile: not a GGUF file")
return
}

View File

@@ -29,6 +29,8 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
if err != nil {
return err
}
config.Description = model.Description
config.License = model.License
} else if len(model.ConfigFile) > 0 {
// TODO: is this worse than using the override method with a blank cfg yaml?
reYamlConfig, err := yaml.Marshal(model.ConfigFile)
@@ -114,7 +116,7 @@ func FindModel(models []*GalleryModel, name string, basePath string) *GalleryMod
// List available models
// Models galleries are a list of yaml files that are hosted on a remote server (for example github).
// Each yaml file contains a list of models that can be downloaded and optionally overrides to define a new model setting.
func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*GalleryModel, error) {
func AvailableGalleryModels(galleries []config.Gallery, basePath string) (GalleryModels, error) {
var models []*GalleryModel
// Get models from galleries

View File

@@ -48,8 +48,10 @@ var _ = Describe("Model test", func() {
defer os.RemoveAll(tempdir)
gallery := []GalleryModel{{
Name: "bert",
URL: bertEmbeddingsURL,
Metadata: Metadata{
Name: "bert",
URL: bertEmbeddingsURL,
},
}}
out, err := yaml.Marshal(gallery)
Expect(err).ToNot(HaveOccurred())

View File

@@ -11,6 +11,14 @@ import (
// It is used to install the model by resolving the URL and downloading the files.
// The other fields are used to override the configuration of the model.
type GalleryModel struct {
Metadata `json:",inline" yaml:",inline"`
// config_file is read in the situation where URL is blank - and therefore this is a base config.
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
// Overrides are used to override the configuration of the model located at URL
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
}
type Metadata struct {
URL string `json:"url,omitempty" yaml:"url,omitempty"`
Name string `json:"name,omitempty" yaml:"name,omitempty"`
Description string `json:"description,omitempty" yaml:"description,omitempty"`
@@ -18,10 +26,6 @@ type GalleryModel struct {
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
// config_file is read in the situation where URL is blank - and therefore this is a base config.
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
// Overrides are used to override the configuration of the model located at URL
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
// AdditionalFiles are used to add additional files to the model
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
// Gallery is a reference to the gallery which contains the model
@@ -58,3 +62,15 @@ func (gm GalleryModels) FindByName(name string) *GalleryModel {
}
return nil
}
func (gm GalleryModels) Paginate(pageNum int, itemsNum int) GalleryModels {
start := (pageNum - 1) * itemsNum
end := start + itemsNum
if start > len(gm) {
start = len(gm)
}
if end > len(gm) {
end = len(gm)
}
return gm[start:end]
}

View File

@@ -9,7 +9,11 @@ import (
var _ = Describe("Gallery API tests", func() {
Context("requests", func() {
It("parses github with a branch", func() {
req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
req := GalleryModel{
Metadata: Metadata{
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
},
}
e, err := GetGalleryConfigFromURL(req.URL, "")
Expect(err).ToNot(HaveOccurred())
Expect(e.Name).To(Equal("gpt4all-j"))

View File

@@ -130,7 +130,6 @@ func API(application *application.Application) (*fiber.App, error) {
return metricsService.Shutdown()
})
}
}
// Health Checks should always be exempt from auth, so register these first
routes.HealthRoutes(router)
@@ -167,13 +166,15 @@ func API(application *application.Application) (*fiber.App, error) {
galleryService := services.NewGalleryService(application.ApplicationConfig())
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
routes.RegisterOpenAIRoutes(router, application)
requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterLocalAIRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
if !application.ApplicationConfig().DisableWebUI {
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
}
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterJINARoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
httpFS := http.FS(embedDirStatic)

View File

@@ -299,14 +299,18 @@ var _ = Describe("API test", func() {
g := []gallery.GalleryModel{
{
Name: "bert",
URL: bertEmbeddingsURL,
Metadata: gallery.Metadata{
Name: "bert",
URL: bertEmbeddingsURL,
},
},
{
Name: "bert2",
URL: bertEmbeddingsURL,
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
Metadata: gallery.Metadata{
Name: "bert2",
URL: bertEmbeddingsURL,
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
},
Overrides: map[string]interface{}{"foo": "bar"},
},
}
out, err := yaml.Marshal(g)
@@ -476,7 +480,7 @@ var _ = Describe("API test", func() {
})
It("apply models from config", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -522,77 +526,6 @@ var _ = Describe("API test", func() {
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
})
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
Name: "openllama_3b",
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
By("testing completion")
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
By("testing functions")
resp2, err := client.CreateChatCompletion(
context.TODO(),
openai.ChatCompletionRequest{
Model: "openllama_3b",
Messages: []openai.ChatCompletionMessage{
{
Role: "user",
Content: "What is the weather like in San Francisco (celsius)?",
},
},
Functions: []openai.FunctionDefinition{
openai.FunctionDefinition{
Name: "get_current_weather",
Description: "Get the current weather",
Parameters: jsonschema.Definition{
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"location": {
Type: jsonschema.String,
Description: "The city and state, e.g. San Francisco, CA",
},
"unit": {
Type: jsonschema.String,
Enum: []string{"celcius", "fahrenheit"},
},
},
Required: []string{"location"},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
var res map[string]string
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
})
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
@@ -600,7 +533,7 @@ var _ = Describe("API test", func() {
modelName := "hermes-2-pro-mistral"
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))

View File

@@ -1,47 +0,0 @@
package fiberContext
import (
"fmt"
"strings"
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
// ModelFromContext returns the model from the context
// If no model is specified, it will take the first available
// Takes a model string as input which should be the one received from the user request.
// It returns the model name resolved from the context and an error if any.
func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
if ctx.Params("model") != "" {
modelInput = ctx.Params("model")
}
if ctx.Query("model") != "" {
modelInput = ctx.Query("model")
}
// Set model from bearer token, if available
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelInput == "" && !bearerExists && firstModel {
models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
if len(models) > 0 {
modelInput = models[0]
log.Debug().Msgf("No model specified, using: %s", modelInput)
} else {
log.Debug().Msgf("No model specified, returning error")
return "", fmt.Errorf("no model specified")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelInput = bearer
}
return modelInput, nil
}

View File

@@ -13,7 +13,7 @@ func installButton(galleryName string) elem.Node {
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"class": "float-right inline-flex items-center rounded-lg bg-blue-600 hover:bg-blue-700 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out shadow hover:shadow-lg",
"hx-swap": "outerHTML",
// post the Model ID as param
"hx-post": "browse/install/model/" + galleryName,
@@ -52,7 +52,7 @@ func infoButton(m *gallery.GalleryModel) elem.Node {
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"class": "float-left inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"class": "inline-flex items-center rounded-lg bg-gray-700 hover:bg-gray-600 px-4 py-2 text-sm font-medium text-white transition duration-300 ease-in-out",
"data-modal-target": modalName(m),
"data-modal-toggle": modalName(m),
},

View File

@@ -17,7 +17,7 @@ const (
func cardSpan(text, icon string) elem.Node {
return elem.Span(
attrs.Props{
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
"class": "inline-flex items-center px-3 py-1 rounded-lg text-xs font-medium bg-gray-700/70 text-gray-300 border border-gray-600/50 mr-2 mb-2",
},
elem.I(attrs.Props{
"class": icon + " pr-2",
@@ -39,19 +39,20 @@ func searchableElement(text, icon string) elem.Node {
),
elem.Span(
attrs.Props{
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
"class": "inline-flex items-center text-xs px-3 py-1 rounded-full bg-gray-700/60 text-gray-300 border border-gray-600/50 hover:bg-gray-600 hover:text-gray-100 transition duration-200 ease-in-out",
},
elem.A(
attrs.Props{
// "name": "search",
// "value": text,
//"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
"href": "#!",
"hx-post": "browse/search/models",
"hx-target": "#search-results",
//"href": "#!",
"href": "browse?term=" + text,
//"hx-post": "browse/search/models",
//"hx-target": "#search-results",
// TODO: this doesn't work
// "hx-vals": `{ \"search\": \"` + text + `\" }`,
"hx-indicator": ".htmx-indicator",
//"hx-indicator": ".htmx-indicator",
},
elem.I(attrs.Props{
"class": icon + " pr-2",
@@ -101,7 +102,7 @@ func modalName(m *gallery.GalleryModel) string {
return m.Name + "-modal"
}
func modelDescription(m *gallery.GalleryModel) elem.Node {
func modelModal(m *gallery.GalleryModel) elem.Node {
urls := []elem.Node{}
for _, url := range m.URLs {
urls = append(urls,
@@ -116,6 +117,125 @@ func modelDescription(m *gallery.GalleryModel) elem.Node {
)
}
return elem.Div(
attrs.Props{
"id": modalName(m),
"tabindex": "-1",
"aria-hidden": "true",
"class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
},
elem.Div(
attrs.Props{
"class": "relative p-4 w-full max-w-2xl max-h-full",
},
elem.Div(
attrs.Props{
"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
},
// header
elem.Div(
attrs.Props{
"class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600",
},
elem.H3(
attrs.Props{
"class": "text-xl font-semibold text-gray-900 dark:text-white",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
),
elem.Button( // close button
attrs.Props{
"class": "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white",
"data-modal-hide": modalName(m),
},
elem.Raw(
`<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
</svg>`,
),
elem.Span(
attrs.Props{
"class": "sr-only",
},
elem.Text("Close modal"),
),
),
),
// body
elem.Div(
attrs.Props{
"class": "p-4 md:p-5 space-y-4",
},
elem.Div(
attrs.Props{
"class": "flex justify-center items-center",
},
elem.Img(attrs.Props{
// "class": "rounded-t-lg object-fit object-center h-96",
"class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
"src": m.Icon,
"loading": "lazy",
}),
),
elem.P(
attrs.Props{
"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
),
elem.Hr(
attrs.Props{},
),
elem.P(
attrs.Props{
"class": "text-sm font-semibold text-gray-900 dark:text-white",
},
elem.Text("Links"),
),
elem.Ul(
attrs.Props{},
urls...,
),
elem.If(
len(m.Tags) > 0,
elem.Div(
attrs.Props{},
elem.P(
attrs.Props{
"class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white",
},
elem.Text("Tags"),
),
elem.Div(
attrs.Props{
"class": "flex flex-row flex-wrap content-center",
},
tagsNodes...,
),
),
elem.Div(attrs.Props{}),
),
),
// Footer
elem.Div(
attrs.Props{
"class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600",
},
elem.Button(
attrs.Props{
"data-modal-hide": modalName(m),
"class": "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700",
},
elem.Text("Close"),
),
),
),
),
)
}
func modelDescription(m *gallery.GalleryModel) elem.Node {
return elem.Div(
attrs.Props{
"class": "p-6 text-surface dark:text-white",
@@ -132,122 +252,6 @@ func modelDescription(m *gallery.GalleryModel) elem.Node {
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
),
elem.Div(
attrs.Props{
"id": modalName(m),
"tabindex": "-1",
"aria-hidden": "true",
"class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
},
elem.Div(
attrs.Props{
"class": "relative p-4 w-full max-w-2xl max-h-full",
},
elem.Div(
attrs.Props{
"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
},
// header
elem.Div(
attrs.Props{
"class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600",
},
elem.H3(
attrs.Props{
"class": "text-xl font-semibold text-gray-900 dark:text-white",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
),
elem.Button( // close button
attrs.Props{
"class": "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white",
"data-modal-hide": modalName(m),
},
elem.Raw(
`<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
</svg>`,
),
elem.Span(
attrs.Props{
"class": "sr-only",
},
elem.Text("Close modal"),
),
),
),
// body
elem.Div(
attrs.Props{
"class": "p-4 md:p-5 space-y-4",
},
elem.Div(
attrs.Props{
"class": "flex justify-center items-center",
},
elem.Img(attrs.Props{
// "class": "rounded-t-lg object-fit object-center h-96",
"class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
"src": m.Icon,
"loading": "lazy",
}),
),
elem.P(
attrs.Props{
"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
),
elem.Hr(
attrs.Props{},
),
elem.P(
attrs.Props{
"class": "text-sm font-semibold text-gray-900 dark:text-white",
},
elem.Text("Links"),
),
elem.Ul(
attrs.Props{},
urls...,
),
elem.If(
len(m.Tags) > 0,
elem.Div(
attrs.Props{},
elem.P(
attrs.Props{
"class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white",
},
elem.Text("Tags"),
),
elem.Div(
attrs.Props{
"class": "flex flex-row flex-wrap content-center",
},
tagsNodes...,
),
),
elem.Div(attrs.Props{}),
),
),
// Footer
elem.Div(
attrs.Props{
"class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600",
},
elem.Button(
attrs.Props{
"data-modal-hide": modalName(m),
"class": "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700",
},
elem.Text("Close"),
),
),
),
),
),
)
}
@@ -397,7 +401,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
modelsElements = append(modelsElements,
elem.Div(
attrs.Props{
"class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2",
"class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2 bg-gray-800/90 border border-gray-700/50 rounded-xl overflow-hidden transition-all duration-300 hover:shadow-lg hover:shadow-blue-900/20 hover:-translate-y-1 hover:border-blue-700/50",
},
elem.Div(
attrs.Props{
@@ -406,6 +410,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
elems...,
),
),
modelModal(m),
)
}

View File

@@ -4,7 +4,7 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
@@ -17,45 +17,21 @@ import (
// @Router /v1/sound-generation [post]
func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.ElevenLabsSoundGenerationRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsSoundGenerationRequest)
if !ok || input.ModelID == "" {
return fiber.ErrBadRequest
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
if err != nil {
modelFile = input.ModelID
log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || cfg == nil {
return fiber.ErrBadRequest
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
modelFile = input.ModelID
log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
} else {
if input.ModelID != "" {
modelFile = input.ModelID
} else {
modelFile = cfg.Model
}
}
log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
if input.Duration != nil {
log.Debug().Float32("duration", *input.Duration).Msg("duration set")
}
if input.Temperature != nil {
log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
}
// TODO: Support uploading files?
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
filePath, _, err := backend.SoundGeneration(input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
if err != nil {
return err
}

View File

@@ -3,7 +3,7 @@ package elevenlabs
import (
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
@@ -20,39 +20,21 @@ import (
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.ElevenLabsTTSRequest)
voiceID := c.Params("voice-id")
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsTTSRequest)
if !ok || input.ModelID == "" {
return fiber.ErrBadRequest
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
if err != nil {
modelFile = input.ModelID
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || cfg == nil {
return fiber.ErrBadRequest
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
modelFile = input.ModelID
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
} else {
if input.ModelID != "" {
modelFile = input.ModelID
} else {
modelFile = cfg.Model
}
}
log.Debug().Msgf("Request for model: %s", modelFile)
log.Debug().Str("modelName", input.ModelID).Msg("elevenlabs TTS request recieved")
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg)
if err != nil {
return err
}

View File

@@ -3,9 +3,9 @@ package jina
import (
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/gofiber/fiber/v2"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
@@ -19,58 +19,32 @@ import (
// @Router /v1/rerank [post]
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
req := new(schema.JINARerankRequest)
if err := c.BodyParser(req); err != nil {
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
"error": "Cannot parse JSON",
})
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.JINARerankRequest)
if !ok || input.Model == "" {
return fiber.ErrBadRequest
}
input := new(schema.TTSRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || cfg == nil {
return fiber.ErrBadRequest
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Request for model: %s", modelFile)
if input.Backend != "" {
cfg.Backend = input.Backend
}
log.Debug().Str("model", input.Model).Msg("JINA Rerank Request recieved")
request := &proto.RerankRequest{
Query: req.Query,
TopN: int32(req.TopN),
Documents: req.Documents,
Query: input.Query,
TopN: int32(input.TopN),
Documents: input.Documents,
}
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
results, err := backend.Rerank(request, ml, appConfig, *cfg)
if err != nil {
return err
}
response := &schema.JINARerankResponse{
Model: req.Model,
Model: input.Model,
}
for _, r := range results.Results {

View File

@@ -117,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
// @Router /models/available [get]
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
if err != nil {
return err
}
log.Debug().Msgf("Models found from galleries: %+v", models)
for _, m := range models {
log.Debug().Msgf("Model found from galleries: %+v", m)
log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
m := []gallery.Metadata{}
for _, mm := range models {
m = append(m, mm.Metadata)
}
dat, err := json.Marshal(models)
log.Debug().Msgf("Models %#v", m)
dat, err := json.Marshal(m)
if err != nil {
return err
return fmt.Errorf("could not marshal models: %w", err)
}
return c.Send(dat)
}

View File

@@ -4,13 +4,15 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/pkg/model"
)
// TODO: This is not yet in use. Needs middleware rework, since it is not referenced.
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
//
// @Summary Get TokenMetrics for Active Slot.
@@ -29,18 +31,13 @@ func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader,
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
if err != nil {
modelFile, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
if !ok || modelFile != "" {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
cfg, err := cl.LoadBackendConfigFileByNameDefaultOptions(modelFile, appConfig)
if err != nil {
log.Err(err)

View File

@@ -4,55 +4,32 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
// TokenizeEndpoint exposes a REST API to tokenize the content
// @Summary Tokenize the input.
// @Param request body schema.TokenizeRequest true "Request"
// @Success 200 {object} schema.TokenizeResponse "Response"
// @Router /v1/tokenize [post]
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.TokenizeRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
return func(ctx *fiber.Ctx) error {
input, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TokenizeRequest)
if !ok || input.Model == "" {
return fiber.ErrBadRequest
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
cfg, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || cfg == nil {
return fiber.ErrBadRequest
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
log.Err(err)
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Request for model: %s", modelFile)
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
if err != nil {
return err
}
c.JSON(tokenResponse)
return nil
return ctx.JSON(tokenResponse)
}
}

View File

@@ -3,7 +3,7 @@ package localai
import (
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
@@ -24,37 +24,24 @@ import (
// @Router /tts [post]
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.TTSRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TTSRequest)
if !ok || input.Model == "" {
return fiber.ErrBadRequest
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || cfg == nil {
return fiber.ErrBadRequest
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request recieved")
if err != nil {
log.Err(err)
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Request for model: %s", modelFile)
if input.Backend != "" {
cfg.Backend = input.Backend
if cfg.Backend == "" {
if input.Backend != "" {
cfg.Backend = input.Backend
} else {
cfg.Backend = model.PiperBackend
}
}
if input.Language != "" {
@@ -65,7 +52,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
cfg.Voice = input.Voice
}
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
filePath, _, err := backend.ModelTTS(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
if err != nil {
return err
}

Some files were not shown because too many files have changed in this diff Show More