Compare commits

...

209 Commits

Author SHA1 Message Date
Dave
a1634b219a fix: roll out bluemonday Sanitize more widely (#3794)
* initial pass: roll out bluemonday sanitization more widely

Signed-off-by: Dave Lee <dave@gray101.com>

* add one additional sanitize - the overall modelslist used by the docs site

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
2024-10-12 09:45:47 +02:00
Ettore Di Giacinto
6257e2f510 chore(deps): bump llama-cpp to 96776405a17034dcfd53d3ddf5d142d34bdbb657 (#3793)
This adapts also to upstream changes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-12 01:25:03 +02:00
Dave
65ca754166 Fix: listmodelservice / welcome endpoint use LOOSE_ONLY (#3791)
* fix list model service and welcome

Signed-off-by: Dave Lee <dave@gray101.com>

* comment

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
2024-10-11 23:49:00 +02:00
Ettore Di Giacinto
a0f0505f0d fix(welcome): do not list model twice if we have a config (#3790)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-11 17:30:14 +02:00
Ettore Di Giacinto
be6c4e6061 fix(llama-cpp): consistently select fallback (#3789)
* fix(llama-cpp): consistently select fallback

We didn't took in consideration the case where the host has the CPU
flagset, but the binaries were not actually present in the asset dir.

This made possible for instance for models that specified the llama-cpp
backend directly in the config to not eventually pick-up the fallback
binary in case the optimized binaries were not present.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: adjust and simplify selection

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: move failure recovery to BackendLoader()

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* comments

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* minor fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-11 16:55:57 +02:00
LocalAI [bot]
1996e6f4c9 chore: ⬆️ Update ggerganov/llama.cpp to 0e9f760eb12546704ef8fa72577bc1a3ffe1bc04 (#3786)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-10 21:46:50 +00:00
Ettore Di Giacinto
671cd42917 chore(gallery): do not specify backend with moondream
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-10-10 19:54:07 +02:00
Ettore Di Giacinto
568a01bf5c models(gallery): add gemma-2-ataraxy-v3i-9b (#3785)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 19:16:23 +02:00
Ettore Di Giacinto
164abb8c9f models(gallery): add fireball-meta-llama-3.2-8b-instruct-agent-003-128k-code-dpo (#3784)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 19:13:47 +02:00
Ettore Di Giacinto
ed2946feac models(gallery): add llama-3.2-3b-agent007-coder (#3783)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 19:11:50 +02:00
Ettore Di Giacinto
bdd351b372 models(gallery): add nihappy-l3.1-8b-v0.09 (#3782)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 19:09:49 +02:00
Ettore Di Giacinto
ad5e7d376a models(gallery): add llama-3.2-3b-agent007 (#3781)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 19:06:58 +02:00
Ettore Di Giacinto
6e78d8cd9d models(gallery): add dans-personalityengine-v1.0.0-8b (#3780)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 18:56:01 +02:00
Ettore Di Giacinto
614125f268 models(gallery): add qwen2.5-7b-ins-v3 (#3779)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 15:05:09 +02:00
Ettore Di Giacinto
f41965bfb5 models(gallery): add rombos-llm-v2.5.1-qwen-3b (#3778)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-10 10:47:41 +02:00
Josh Bennett
85a3cc8d8f feat(transformers): Use downloaded model for Transformers backend if it already exists. (#3777)
* signing commit

Signed-off-by: Josh Bennett <562773+joshbtn@users.noreply.github.com>

* Update transformers backend to check for existing model directory

Signed-off-by: Josh Bennett <562773+joshbtn@users.noreply.github.com>

---------

Signed-off-by: Josh Bennett <562773+joshbtn@users.noreply.github.com>
2024-10-10 08:42:59 +00:00
LocalAI [bot]
ea8675d473 chore: ⬆️ Update ggerganov/llama.cpp to c81f3bbb051f8b736e117dfc78c99d7c4e0450f6 (#3775)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-09 21:40:46 +00:00
Ettore Di Giacinto
08a54c1812 models(gallery): add llama-3.1-swallow-70b-v0.1-i1 (#3774)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-09 17:16:17 +02:00
Ettore Di Giacinto
8c7439b96e models(gallery): add llama3.2-3b-esper2 (#3773)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-09 17:08:13 +02:00
Ettore Di Giacinto
a9e42a76fa models(gallery): add llama3.2-3b-enigma (#3772)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-09 17:05:50 +02:00
Ettore Di Giacinto
1a3b3d3e67 models(gallery): add versatillama-llama-3.2-3b-instruct-abliterated (#3771)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-09 16:58:34 +02:00
LocalAI [bot]
759d35e6b5 chore: ⬆️ Update ggerganov/whisper.cpp to fdbfb460ed546452a5d53611bba66d10d842e719 (#3768)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-09 09:42:44 +02:00
LocalAI [bot]
825e85bcc5 chore: ⬆️ Update ggerganov/llama.cpp to dca1d4b58a7f1acf1bd253be84e50d6367f492fd (#3769)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-08 21:41:05 +00:00
Ettore Di Giacinto
62165d556c models(gallery): add archfunctions template
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-08 18:52:21 +02:00
Ettore Di Giacinto
78459889d8 models(gallery): add archfunctions models (#3767)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-08 18:51:30 +02:00
Ettore Di Giacinto
0fdc6a92f6 models(gallery): add moe-girl-1ba-7bt-i1 (#3766)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-08 18:38:27 +02:00
LocalAI [bot]
8586a0167a chore: ⬆️ Update ggerganov/whisper.cpp to ebca09a3d1033417b0c630bbbe607b0f185b1488 (#3764)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-08 09:35:18 +02:00
LocalAI [bot]
f1d16a45c5 chore: ⬆️ Update ggerganov/llama.cpp to 6374743747b14db4eb73ce82ae449a2978bc3b47 (#3763)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-08 09:35:01 +02:00
dependabot[bot]
2023627d7f chore(deps): Bump appleboy/ssh-action from 1.0.3 to 1.1.0 (#3762)
Bumps [appleboy/ssh-action](https://github.com/appleboy/ssh-action) from 1.0.3 to 1.1.0.
- [Release notes](https://github.com/appleboy/ssh-action/releases)
- [Changelog](https://github.com/appleboy/ssh-action/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/appleboy/ssh-action/compare/v1.0.3...v1.1.0)

---
updated-dependencies:
- dependency-name: appleboy/ssh-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:34:43 +02:00
dependabot[bot]
d5e1958a1f chore(deps): Bump nginx from 1.27.0 to 1.27.2 in /examples/k8sgpt (#3761)
Bumps nginx from 1.27.0 to 1.27.2.

---
updated-dependencies:
- dependency-name: nginx
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:34:23 +02:00
dependabot[bot]
f9c58a01d3 chore(deps): Bump llama-index from 0.11.14 to 0.11.16 in /examples/langchain-chroma (#3760)
chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.11.14 to 0.11.16.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.11.14...v0.11.16)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:34:05 +02:00
dependabot[bot]
4500650000 chore(deps): Bump openai from 1.50.2 to 1.51.1 in /examples/langchain-chroma (#3758)
chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.50.2 to 1.51.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.50.2...v1.51.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:33:25 +02:00
dependabot[bot]
5674e671d0 chore(deps): Bump langchain from 0.3.1 to 0.3.2 in /examples/langchain/langchainpy-localai-example (#3752)
chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.3.1 to 0.3.2.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.3.1...langchain==0.3.2)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:33:13 +02:00
dependabot[bot]
0f44c3f69c chore(deps): Bump debugpy from 1.8.2 to 1.8.6 in /examples/langchain/langchainpy-localai-example (#3751)
chore(deps): Bump debugpy

Bumps [debugpy](https://github.com/microsoft/debugpy) from 1.8.2 to 1.8.6.
- [Release notes](https://github.com/microsoft/debugpy/releases)
- [Commits](https://github.com/microsoft/debugpy/compare/v1.8.2...v1.8.6)

---
updated-dependencies:
- dependency-name: debugpy
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:32:59 +02:00
dependabot[bot]
f9069daf03 chore(deps): Bump streamlit from 1.38.0 to 1.39.0 in /examples/streamlit-bot (#3757)
chore(deps): Bump streamlit in /examples/streamlit-bot

Bumps [streamlit](https://github.com/streamlit/streamlit) from 1.38.0 to 1.39.0.
- [Release notes](https://github.com/streamlit/streamlit/releases)
- [Commits](https://github.com/streamlit/streamlit/compare/1.38.0...1.39.0)

---
updated-dependencies:
- dependency-name: streamlit
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:32:40 +02:00
dependabot[bot]
5f58841a3a chore(deps): Bump llama-index from 0.11.14 to 0.11.16 in /examples/chainlit (#3753)
chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.11.14 to 0.11.16.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.11.14...v0.11.16)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:32:30 +02:00
dependabot[bot]
287200e687 chore(deps): Bump aiohttp from 3.10.8 to 3.10.9 in /examples/langchain/langchainpy-localai-example (#3750)
chore(deps): Bump aiohttp

Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.10.8 to 3.10.9.
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.10.8...v3.10.9)

---
updated-dependencies:
- dependency-name: aiohttp
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:32:18 +02:00
dependabot[bot]
b653883c0a chore(deps): Bump multidict from 6.0.5 to 6.1.0 in /examples/langchain/langchainpy-localai-example (#3749)
chore(deps): Bump multidict

Bumps [multidict](https://github.com/aio-libs/multidict) from 6.0.5 to 6.1.0.
- [Release notes](https://github.com/aio-libs/multidict/releases)
- [Changelog](https://github.com/aio-libs/multidict/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/multidict/compare/v6.0.5...v6.1.0)

---
updated-dependencies:
- dependency-name: multidict
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:32:08 +02:00
dependabot[bot]
6b8a402353 chore(deps): Bump openai from 1.45.1 to 1.51.1 in /examples/langchain/langchainpy-localai-example (#3748)
chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.45.1 to 1.51.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.45.1...v1.51.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 09:24:56 +02:00
Ettore Di Giacinto
d9b63fae7c chore(tests): improve rwkv tests and consume TEST_FLAKES (#3765)
chores(tests): improve rwkv tests and consume TEST_FLAKES

consistently use TEST_FLAKES and reduce flakiness of rwkv tests by being
case insensitive

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-08 09:24:19 +02:00
dependabot[bot]
377cdcabbf chore(deps): Bump openai from 1.50.2 to 1.51.1 in /examples/functions (#3754)
Bumps [openai](https://github.com/openai/openai-python) from 1.50.2 to 1.51.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.50.2...v1.51.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-08 00:05:53 +00:00
dependabot[bot]
92a7f40141 chore(deps): Bump langchain from 0.3.1 to 0.3.2 in /examples/functions (#3755)
Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.3.1 to 0.3.2.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.3.1...langchain==0.3.2)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-07 21:04:18 +00:00
Ettore Di Giacinto
e06daf437a chore(Dockerfile): default to cmake from package manager (#3746)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-07 16:42:56 +02:00
Ettore Di Giacinto
d19bea4af2 chore(vllm): do not install from source (#3745)
chore(vllm): do not install from source by default

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-07 12:27:37 +02:00
Ettore Di Giacinto
fbca9f82fd fix(vllm): bump cmake - vllm requires it (#3744)
* fix(vllm): bump cmake - vllm requires it

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): try to increase coqui timeout

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-07 11:22:55 +02:00
Ettore Di Giacinto
04f284d202 models(gallery): add gemma-2-9b-it-abliterated (#3743)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-07 09:56:33 +02:00
Ettore Di Giacinto
cfd6112256 models(gallery): add violet_twilight-v0.2-iq-imatrix (#3742)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-07 09:52:23 +02:00
Ettore Di Giacinto
debc0974a6 models(gallery): add t.e-8.1-iq-imatrix-request (#3741)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-07 09:51:57 +02:00
Ettore Di Giacinto
03bbbea039 models(gallery): add mn-backyardai-party-12b-v1-iq-arm-imatrix (#3740)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-07 09:46:44 +02:00
LocalAI [bot]
55af0b1c68 chore: ⬆️ Update ggerganov/whisper.cpp to 9f346d00840bcd7af62794871109841af40cecfb (#3739)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-07 09:33:28 +02:00
LocalAI [bot]
c8bfb72104 chore: ⬆️ Update ggerganov/llama.cpp to d5cb86844f26f600c48bf3643738ea68138f961d (#3738)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-06 21:40:25 +00:00
LocalAI [bot]
1b8a663001 chore: ⬆️ Update ggerganov/llama.cpp to 8c475b97b8ba7d678d4c9904b1161bd8811a9b44 (#3736)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-06 10:10:13 +02:00
LocalAI [bot]
a9abfa2b61 chore: ⬆️ Update ggerganov/whisper.cpp to 6a94163b913d8e974e60d9ac56c8930d19f45773 (#3735)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-06 10:09:57 +02:00
Ettore Di Giacinto
092bb0bd6b fix(base-grpc): close channel in base grpc server (#3734)
If the LLM does not implement any logic for PredictStream, we close the
channel immediately to not leave the process hanging.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-05 15:14:27 +02:00
Ettore Di Giacinto
e28e80857b feat(shutdown): allow force shutdown of backends (#3733)
We default to a soft kill, however, we might want to force killing
backends after a while to avoid hanging requests (which may hallucinate
indefinetly)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-05 10:41:35 +02:00
LocalAI [bot]
905473c739 chore: ⬆️ Update ggerganov/whisper.cpp to 2944cb72d95282378037cb0eb45c9e2b2529ff2c (#3730)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-05 00:09:24 +02:00
LocalAI [bot]
aa0564a1c6 chore: ⬆️ Update ggerganov/llama.cpp to 71967c2a6d30da9f61580d3e2d4cb00e0223b6fa (#3731)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-05 00:09:02 +02:00
Ettore Di Giacinto
2553de0187 feat(vllm): add support for image-to-text and video-to-text (#3729)
* feat(vllm): add support for image-to-text

Related to https://github.com/mudler/LocalAI/issues/3670

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(vllm): add support for video-to-text

Closes: https://github.com/mudler/LocalAI/issues/2318

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(vllm): support CPU installations

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(vllm): add bnb

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: add docs reference

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Apply suggestions from code review

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-10-04 23:42:05 +02:00
Ettore Di Giacinto
408dfe62ee Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-10-04 19:52:43 +02:00
Ettore Di Giacinto
648ffdf449 feat(multimodal): allow to template placeholders (#3728)
feat(multimodal): allow to template image placeholders

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-04 18:32:29 +02:00
LocalAI [bot]
04c0841ca9 chore: ⬆️ Update ggerganov/whisper.cpp to ccc2547210e09e3a1785817383ab770389bb442b (#3724)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-04 09:10:07 +02:00
LocalAI [bot]
43144c4743 chore: ⬆️ Update ggerganov/llama.cpp to d5ed2b929d85bbd7dbeecb690880f07d9d7a6077 (#3725)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-04 09:09:41 +02:00
Ettore Di Giacinto
a778668bcd models(gallery): add salamandra-7b-instruct (#3726)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-03 23:17:32 +02:00
JJ Asghar
4b131a7090 Update CONTRIBUTING.md (#3723)
Updated some formatting in the doc.

Signed-off-by: JJ Asghar <awesome@ibm.com>
2024-10-03 20:03:35 +02:00
Ettore Di Giacinto
d06a052d54 chore(federated): display a message when nodes are not available (#3721)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-03 18:38:41 +02:00
LocalAI [bot]
b5115903bf chore: ⬆️ Update ggerganov/whisper.cpp to ede1718f6d45aa3f7ad4a1e169dfbc9d51570c4e (#3719)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-03 09:39:49 +02:00
LocalAI [bot]
afaff175d0 chore: ⬆️ Update ggerganov/llama.cpp to a39ab216aa624308fda7fa84439c6b61dc98b87a (#3718)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-03 09:37:05 +02:00
Ettore Di Giacinto
4686877c6d fix(initializer): correctly reap dangling processes (#3717)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-02 20:37:40 +02:00
Ettore Di Giacinto
e5586e8781 chore: get model also from query (#3716)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-02 20:20:50 +02:00
Ettore Di Giacinto
3acd767ac4 chore: simplify model loading (#3715)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-02 08:59:06 +02:00
Shraddha
5488fc3bc1 feat: tokenization endpoint (#3710)
endpoint to access the tokenizer

Signed-off-by: shraddhazpy <shraddha@shraddhafive.in>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Dave <dave@gray101.com>
2024-10-02 08:56:18 +02:00
Ettore Di Giacinto
0965c6cd68 feat: track internally started models by ID (#3693)
* chore(refactor): track internally started models by ID

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Just extend options, no need to copy

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Improve debugging for rerankers failures

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify model loading with rerankers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Be more consistent when generating model options

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Uncommitted code

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make deleteProcess more idiomatic

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt CLI for sound generation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixup threads definition

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Handle corner case where c.Seed is nil

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Consistently use ModelOptions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt new code to refactoring

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Dave <dave@gray101.com>
2024-10-02 08:55:58 +02:00
LocalAI [bot]
db704199dc chore: ⬆️ Update ggerganov/llama.cpp to 3f1ae2e32cde00c39b96be6d01c2997c29bae555 (#3713)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-02 00:45:45 +02:00
LocalAI [bot]
2cc3b7128e chore: ⬆️ Update ggerganov/whisper.cpp to 2ef717b293fe93872cc3a03ca77942936a281959 (#3712)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-02 00:45:33 +02:00
Ettore Di Giacinto
88b99d30bb Update README.md
update hot topics
2024-10-01 22:47:51 +02:00
Dave
307a835199 groundwork: ListModels Filtering Upgrade (#2773)
* seperate the filtering from the middleware changes

---------

Signed-off-by: Dave Lee <dave@gray101.com>
2024-10-01 18:55:46 +00:00
siddimore
f84b55d1ef feat: Add Get Token Metrics to GRPC server (#3687)
* Add Get Token Metrics to GRPC server

Signed-off-by: Siddharth More <siddimore@gmail.com>

* Expose LocalAI endpoint

Signed-off-by: Siddharth More <siddimore@gmail.com>

---------

Signed-off-by: Siddharth More <siddimore@gmail.com>
2024-10-01 14:41:20 +02:00
dependabot[bot]
139209353f chore(deps): Bump llama-index from 0.11.12 to 0.11.14 in /examples/chainlit (#3707)
chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.11.12 to 0.11.14.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.11.12...v0.11.14)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:41:30 +02:00
dependabot[bot]
a30058b80f chore(deps): Bump yarl from 1.11.1 to 1.13.1 in /examples/langchain/langchainpy-localai-example (#3706)
chore(deps): Bump yarl

Bumps [yarl](https://github.com/aio-libs/yarl) from 1.11.1 to 1.13.1.
- [Release notes](https://github.com/aio-libs/yarl/releases)
- [Changelog](https://github.com/aio-libs/yarl/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/yarl/compare/v1.11.1...v1.13.1)

---
updated-dependencies:
- dependency-name: yarl
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:41:16 +02:00
dependabot[bot]
53f406dc35 chore(deps): Bump aiohttp from 3.10.3 to 3.10.8 in /examples/langchain/langchainpy-localai-example (#3705)
chore(deps): Bump aiohttp

Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.10.3 to 3.10.8.
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.10.3...v3.10.8)

---
updated-dependencies:
- dependency-name: aiohttp
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:41:04 +02:00
dependabot[bot]
2649407f44 chore(deps): Bump llama-index from 0.11.12 to 0.11.14 in /examples/langchain-chroma (#3695)
chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.11.12 to 0.11.14.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.11.12...v0.11.14)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:40:49 +02:00
dependabot[bot]
0a8f627cce chore(deps): Bump gradio from 4.38.1 to 4.44.1 in /backend/python/openvoice (#3701)
chore(deps): Bump gradio in /backend/python/openvoice

Bumps [gradio](https://github.com/gradio-app/gradio) from 4.38.1 to 4.44.1.
- [Release notes](https://github.com/gradio-app/gradio/releases)
- [Changelog](https://github.com/gradio-app/gradio/blob/main/CHANGELOG.md)
- [Commits](https://github.com/gradio-app/gradio/compare/gradio@4.38.1...gradio@4.44.1)

---
updated-dependencies:
- dependency-name: gradio
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:40:36 +02:00
dependabot[bot]
76d4e88e0c chore(deps): Bump langchain-community from 0.2.16 to 0.3.1 in /examples/langchain/langchainpy-localai-example (#3702)
chore(deps): Bump langchain-community

Bumps [langchain-community](https://github.com/langchain-ai/langchain) from 0.2.16 to 0.3.1.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain-community==0.2.16...langchain-community==0.3.1)

---
updated-dependencies:
- dependency-name: langchain-community
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:40:21 +02:00
dependabot[bot]
d4d2a76f8f chore(deps): Bump langchain from 0.3.0 to 0.3.1 in /examples/functions (#3700)
Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.3.0 to 0.3.1.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.3.0...langchain==0.3.1)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:40:08 +02:00
dependabot[bot]
7d306c6431 chore(deps): Bump greenlet from 3.1.0 to 3.1.1 in /examples/langchain/langchainpy-localai-example (#3703)
chore(deps): Bump greenlet

Bumps [greenlet](https://github.com/python-greenlet/greenlet) from 3.1.0 to 3.1.1.
- [Changelog](https://github.com/python-greenlet/greenlet/blob/master/CHANGES.rst)
- [Commits](https://github.com/python-greenlet/greenlet/compare/3.1.0...3.1.1)

---
updated-dependencies:
- dependency-name: greenlet
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 10:39:55 +02:00
dependabot[bot]
44bdacac61 chore(deps): Bump langchain from 0.3.0 to 0.3.1 in /examples/langchain/langchainpy-localai-example (#3704)
chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.3.0 to 0.3.1.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.3.0...langchain==0.3.1)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 08:51:29 +02:00
dependabot[bot]
6bd6e2bdeb chore(deps): Bump openai from 1.47.1 to 1.50.2 in /examples/functions (#3699)
Bumps [openai](https://github.com/openai/openai-python) from 1.47.1 to 1.50.2.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.47.1...v1.50.2)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 08:51:07 +02:00
dependabot[bot]
2908ff3f6b chore(deps): Bump securego/gosec from 2.21.0 to 2.21.4 (#3698)
Bumps [securego/gosec](https://github.com/securego/gosec) from 2.21.0 to 2.21.4.
- [Release notes](https://github.com/securego/gosec/releases)
- [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml)
- [Commits](https://github.com/securego/gosec/compare/v2.21.0...v2.21.4)

---
updated-dependencies:
- dependency-name: securego/gosec
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 08:50:40 +02:00
LocalAI [bot]
f19277b8e2 chore: ⬆️ Update ggerganov/llama.cpp to 6f1d9d71f4c568778a7637ff6582e6f6ba5fb9d3 (#3708)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-10-01 08:47:48 +02:00
dependabot[bot]
32de75c683 chore(deps): Bump langchain from 0.3.0 to 0.3.1 in /examples/langchain-chroma (#3694)
chore(deps): Bump langchain in /examples/langchain-chroma

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.3.0 to 0.3.1.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.3.0...langchain==0.3.1)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 03:13:37 +00:00
dependabot[bot]
164a9e972f chore(deps): Bump chromadb from 0.5.7 to 0.5.11 in /examples/langchain-chroma (#3696)
chore(deps): Bump chromadb in /examples/langchain-chroma

Bumps [chromadb](https://github.com/chroma-core/chroma) from 0.5.7 to 0.5.11.
- [Release notes](https://github.com/chroma-core/chroma/releases)
- [Changelog](https://github.com/chroma-core/chroma/blob/main/RELEASE_PROCESS.md)
- [Commits](https://github.com/chroma-core/chroma/compare/0.5.7...0.5.11)

---
updated-dependencies:
- dependency-name: chromadb
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-01 01:37:30 +00:00
dependabot[bot]
d747f2c89b chore(deps): Bump openai from 1.47.1 to 1.50.2 in /examples/langchain-chroma (#3697)
chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.47.1 to 1.50.2.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.47.1...v1.50.2)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-30 21:08:16 +00:00
Ettore Di Giacinto
58662db48e models(gallery): add calme-2.2-qwen2.5-72b-i1 (#3691)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-30 17:11:54 +02:00
Ettore Di Giacinto
078942fc9f chore(deps): bump grpcio to 1.66.2 (#3690)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-30 09:09:51 +02:00
LocalAI [bot]
6dfee99575 chore: ⬆️ Update ggerganov/llama.cpp to c919d5db39c8a7fcb64737f008e4b105ee0acd20 (#3686)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-30 09:09:18 +02:00
Ettore Di Giacinto
ad62156d54 models(gallery): add replete-llm-v2.5-qwen-7b (#3689)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-29 22:47:26 +02:00
Ettore Di Giacinto
1689740269 models(gallery): add replete-llm-v2.5-qwen-14b (#3688)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-29 20:39:39 +02:00
siddimore
50a3b54e34 feat(api): add correlationID to Track Chat requests (#3668)
* Add CorrelationID to chat request

Signed-off-by: Siddharth More <siddimore@gmail.com>

* remove get_token_metrics

Signed-off-by: Siddharth More <siddimore@gmail.com>

* Add CorrelationID to proto

Signed-off-by: Siddharth More <siddimore@gmail.com>

* fix correlation method name

Signed-off-by: Siddharth More <siddimore@gmail.com>

* Update core/http/endpoints/openai/chat.go

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Signed-off-by: Siddharth More <siddimore@gmail.com>

* Update core/http/endpoints/openai/chat.go

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Signed-off-by: Siddharth More <siddimore@gmail.com>

---------

Signed-off-by: Siddharth More <siddimore@gmail.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-28 17:23:56 +02:00
LocalAI [bot]
e94a50e9db chore: ⬆️ Update ggerganov/whisper.cpp to 8feb375fbdf0277ad36958c218c6bf48fa0ba75a (#3680)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-28 08:02:19 +00:00
LocalAI [bot]
4e0f3cc980 chore: ⬆️ Update ggerganov/llama.cpp to b5de3b74a595cbfefab7eeb5a567425c6a9690cf (#3681)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-27 22:42:59 +00:00
Ettore Di Giacinto
2a8cbad122 models(gallery): add bigqwen2.5-52b-instruct (#3679)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-27 13:03:41 +02:00
Ettore Di Giacinto
453c45d022 models(gallery): add magnusintellectus-12b-v1-i1 (#3678)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-27 12:21:04 +02:00
LocalAI [bot]
4550abbfce chore(model-gallery): ⬆️ update checksum (#3675)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-27 08:54:36 +02:00
LocalAI [bot]
f2ba1cfb01 chore: ⬆️ Update ggerganov/llama.cpp to 95bc82fbc0df6d48cf66c857a4dda3d044f45ca2 (#3674)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-26 21:41:45 +00:00
LocalAI [bot]
8c4196faf3 chore: ⬆️ Update ggerganov/whisper.cpp to 69339af2d104802f3f201fd419163defba52890e (#3666)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-26 15:58:17 +02:00
LocalAI [bot]
b0f4556c0f chore: ⬆️ Update ggerganov/llama.cpp to ea9c32be71b91b42ecc538bd902e93cbb5fb36cb (#3667)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-26 12:52:26 +00:00
Ettore Di Giacinto
fa5c98549a chore(refactor): track grpcProcess in the model structure (#3663)
* chore(refactor): track grpcProcess in the model structure

This avoids to have to handle in two parts the data relative to the same
model. It makes it easier to track and use mutex with.

This also fixes races conditions while accessing to the model.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): run protogen-go before starting aio tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): install protoc in aio tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-26 12:44:55 +02:00
Ettore Di Giacinto
3d12d2037c models(gallery): add llama-3.2 3B and 1B (#3671)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-26 11:19:26 +02:00
LocalAI [bot]
d6522e69ca feat(swagger): update swagger (#3664)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-26 10:57:40 +02:00
LocalAI [bot]
ef1507d000 docs: ⬆️ update docs version mudler/LocalAI (#3665)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-26 10:50:20 +02:00
Ettore Di Giacinto
a3d69872e3 feat(api): list loaded models in /system (#3661)
feat(api): list loaded models in /system

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-25 18:00:23 +02:00
dependabot[bot]
33b2d38dd0 chore(deps): Bump chromadb from 0.5.5 to 0.5.7 in /examples/langchain-chroma (#3640)
chore(deps): Bump chromadb in /examples/langchain-chroma

Bumps [chromadb](https://github.com/chroma-core/chroma) from 0.5.5 to 0.5.7.
- [Release notes](https://github.com/chroma-core/chroma/releases)
- [Changelog](https://github.com/chroma-core/chroma/blob/main/RELEASE_PROCESS.md)
- [Commits](https://github.com/chroma-core/chroma/compare/0.5.5...0.5.7)

---
updated-dependencies:
- dependency-name: chromadb
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-25 12:44:32 +02:00
LocalAI [bot]
74408bdc77 chore: ⬆️ Update ggerganov/whisper.cpp to 0d2e2aed80109e8696791083bde3b58e190b7812 (#3658)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Dave <dave@gray101.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-25 08:54:37 +00:00
dependabot[bot]
8c4f720fb5 chore(deps): Bump llama-index from 0.11.9 to 0.11.12 in /examples/chainlit (#3642)
chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.11.9 to 0.11.12.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.11.9...v0.11.12)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-25 08:48:13 +02:00
dependabot[bot]
8002ad27cb chore(deps): Bump openai from 1.45.1 to 1.47.1 in /examples/langchain-chroma (#3641)
chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.45.1 to 1.47.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.45.1...v1.47.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-25 08:47:57 +02:00
dependabot[bot]
1b8a77433a chore(deps): Bump llama-index from 0.11.7 to 0.11.12 in /examples/langchain-chroma (#3639)
chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.11.7 to 0.11.12.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.11.7...v0.11.12)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-25 08:47:33 +02:00
LocalAI [bot]
a370a11115 docs: ⬆️ update docs version mudler/LocalAI (#3657)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-25 08:47:03 +02:00
LocalAI [bot]
aa87eff283 chore: ⬆️ Update ggerganov/llama.cpp to 70392f1f81470607ba3afef04aa56c9f65587664 (#3659)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Dave <dave@gray101.com>
2024-09-25 04:51:20 +00:00
dependabot[bot]
0d784f46e5 chore(deps): Bump openai from 1.45.1 to 1.47.1 in /examples/functions (#3645)
Bumps [openai](https://github.com/openai/openai-python) from 1.45.1 to 1.47.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.45.1...v1.47.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-25 01:15:53 +00:00
dependabot[bot]
c54cfd3609 chore(deps): Bump pydantic from 2.8.2 to 2.9.2 in /examples/langchain/langchainpy-localai-example (#3648)
chore(deps): Bump pydantic

Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.8.2 to 2.9.2.
- [Release notes](https://github.com/pydantic/pydantic/releases)
- [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md)
- [Commits](https://github.com/pydantic/pydantic/compare/v2.8.2...v2.9.2)

---
updated-dependencies:
- dependency-name: pydantic
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-24 22:59:11 +00:00
dependabot[bot]
6555994060 chore(deps): Bump sentence-transformers from 3.1.0 to 3.1.1 in /backend/python/sentencetransformers (#3651)
chore(deps): Bump sentence-transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.1.0 to 3.1.1.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.1.0...v3.1.1)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-24 21:22:08 +00:00
Ettore Di Giacinto
0893d3cbbe fix(health): do not require auth for /healthz and /readyz (#3656)
* fix(health): do not require auth for /healthz and /readyz

Fixes: #3655

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Comment so I don’t forget

Adding a reminder here...

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Dave <dave@gray101.com>
2024-09-24 18:25:59 +00:00
Dave
90cacb9692 test: preliminary tests and merge fix for authv2 (#3584)
* add api key to existing app tests, add preliminary auth test

Signed-off-by: Dave Lee <dave@gray101.com>

* small fix, run test

Signed-off-by: Dave Lee <dave@gray101.com>

* status on non-opaque

Signed-off-by: Dave Lee <dave@gray101.com>

* tweak auth error

Signed-off-by: Dave Lee <dave@gray101.com>

* exp

Signed-off-by: Dave Lee <dave@gray101.com>

* quick fix on real laptop

Signed-off-by: Dave Lee <dave@gray101.com>

* add downloader version that allows providing an auth header

Signed-off-by: Dave Lee <dave@gray101.com>

* stash some devcontainer fixes during testing

Signed-off-by: Dave Lee <dave@gray101.com>

* s2

Signed-off-by: Dave Lee <dave@gray101.com>

* s

Signed-off-by: Dave Lee <dave@gray101.com>

* done with experiment

Signed-off-by: Dave Lee <dave@gray101.com>

* done with experiment

Signed-off-by: Dave Lee <dave@gray101.com>

* after merge fix

Signed-off-by: Dave Lee <dave@gray101.com>

* rename and fix

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-24 09:32:48 +02:00
LocalAI [bot]
69d2902b0a chore: ⬆️ Update ggerganov/llama.cpp to f0c7b5edf82aa200656fd88c11ae3a805d7130bf (#3653)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-24 09:31:28 +02:00
dependabot[bot]
c1752cbb83 chore(deps): Bump sqlalchemy from 2.0.32 to 2.0.35 in /examples/langchain/langchainpy-localai-example (#3649)
chore(deps): Bump sqlalchemy

Bumps [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy) from 2.0.32 to 2.0.35.
- [Release notes](https://github.com/sqlalchemy/sqlalchemy/releases)
- [Changelog](https://github.com/sqlalchemy/sqlalchemy/blob/main/CHANGES.rst)
- [Commits](https://github.com/sqlalchemy/sqlalchemy/commits)

---
updated-dependencies:
- dependency-name: sqlalchemy
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-24 04:30:05 +00:00
dependabot[bot]
b8e129f2a6 chore(deps): Bump idna from 3.8 to 3.10 in /examples/langchain/langchainpy-localai-example (#3644)
chore(deps): Bump idna

Bumps [idna](https://github.com/kjd/idna) from 3.8 to 3.10.
- [Release notes](https://github.com/kjd/idna/releases)
- [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst)
- [Commits](https://github.com/kjd/idna/compare/v3.8...v3.10)

---
updated-dependencies:
- dependency-name: idna
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-24 02:53:35 +00:00
dependabot[bot]
cc6fac1688 chore(deps): Bump urllib3 from 2.2.2 to 2.2.3 in /examples/langchain/langchainpy-localai-example (#3646)
chore(deps): Bump urllib3

Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.2.2 to 2.2.3.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/2.2.2...2.2.3)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-24 01:16:39 +00:00
dependabot[bot]
043cb94436 chore(deps): Bump yarl from 1.11.0 to 1.11.1 in /examples/langchain/langchainpy-localai-example (#3643)
chore(deps): Bump yarl

Bumps [yarl](https://github.com/aio-libs/yarl) from 1.11.0 to 1.11.1.
- [Release notes](https://github.com/aio-libs/yarl/releases)
- [Changelog](https://github.com/aio-libs/yarl/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/yarl/compare/v1.11.0...v1.11.1)

---
updated-dependencies:
- dependency-name: yarl
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-23 21:23:21 +00:00
Ettore Di Giacinto
bbdf78615e models(gallery): add acolyte-22b-i1 (#3636)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-23 19:24:14 +02:00
Ettore Di Giacinto
e332ff8066 models(gallery): add gemma-2-2b-arliai-rpmax-v1.1 (#3635)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-23 19:16:41 +02:00
Ettore Di Giacinto
26d99ed1c7 models(gallery): add gemma-2-9b-arliai-rpmax-v1.1 (#3634)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-23 19:12:54 +02:00
Ettore Di Giacinto
1da8d8b9db models(gallery): add nightygurps-14b-v1.1 (#3633)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-23 19:09:51 +02:00
Ettore Di Giacinto
bf8f8671d1 chore(ci): adjust parallelism
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-23 19:04:36 +02:00
Ettore Di Giacinto
51cba89682 fix(hipblas): do not push all variants to hipblas builds (#3630)
Like with CUDA builds, we don't need all the variants when we are
compiling against the accelerated variants - in this way we save space
and we avoid to exceed embedFS golang size limits.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-23 11:49:07 +02:00
Ettore Di Giacinto
3e8e71f8b6 fix(ci): fixup checksum scanning pipeline (#3631)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-23 10:56:10 +02:00
LocalAI [bot]
4edd8c80b4 chore: ⬆️ Update ggerganov/llama.cpp to c35e586ea57221844442c65a1172498c54971cb0 (#3629)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-22 21:41:34 +00:00
Ettore Di Giacinto
fd70a22196 chore(ci): adjust parallel jobs
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-22 15:21:16 +02:00
Ettore Di Giacinto
56f4deb938 chore(ci): split hipblas jobs
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-22 15:19:38 +02:00
Ettore Di Giacinto
9bd7f3f995 feat(coqui): switch to maintained community fork (#3625)
Fixes: https://github.com/mudler/LocalAI/issues/2513

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-22 10:04:20 +02:00
Sertaç Özercan
ee21b00a8d feat: auto load into memory on startup (#3627)
Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
2024-09-22 10:03:30 +02:00
LocalAI [bot]
1f43678d53 chore: ⬆️ Update ggerganov/llama.cpp to d09770cae71b416c032ec143dda530f7413c4038 (#3626)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-21 22:03:23 +00:00
Ettore Di Giacinto
20c0e128c0 fix(sycl): downgrade pypinyin
melotts requires pypinyin 0.50

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-09-21 21:52:12 +02:00
Ettore Di Giacinto
5c3d1d81e6 fix(parler-tts): fix install with sycl (#3624)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-21 16:04:04 +02:00
lnyxaris
c22b3187a7 Fix NeuralDaredevil URL (#3621)
Signed-off-by: lnyxaris <nyxaris@anomalous.news>
2024-09-21 10:10:27 +02:00
LocalAI [bot]
54f2657870 chore: ⬆️ Update ggerganov/llama.cpp to 63351143b2ea5efe9f8b9c61f553af8a51f1deff (#3622)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-21 10:09:41 +02:00
LocalAI [bot]
cef7f8a014 chore: ⬆️ Update ggerganov/whisper.cpp to 34972dbe221709323714fc8402f2e24041d48213 (#3623)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-20 21:41:13 +00:00
Ettore Di Giacinto
bf8e50a11d chore(docs): add Vulkan images links (#3620)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 18:16:01 +02:00
Ettore Di Giacinto
6c6cd8bbe0 models(gallery): add llama-3.1-8b-arliai-rpmax-v1.1 (#3619)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 18:15:51 +02:00
Ettore Di Giacinto
00d6c2a966 models(gallery): add llama3.1-reflective config
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 17:35:06 +02:00
Ettore Di Giacinto
415cf31aa3 models(gallery): add buddy2 (#3618)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 17:33:29 +02:00
Ettore Di Giacinto
f55053bfba models(gallery): add llama3.1-8b-shiningvaliant2 (#3617)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 17:26:59 +02:00
Ettore Di Giacinto
e24654ada0 models(gallery): add llama-3.1-supernova-lite (#3616)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 17:23:30 +02:00
Ettore Di Giacinto
c4cecba07f models(gallery): add llama-3.1-supernova-lite-reflection-v1.0-i1 (#3615)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 17:19:53 +02:00
Ettore Di Giacinto
38cad0b8dc models(gallery): add qwen2.5 32B, 72B, 32B Instruct (#3614)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 17:10:43 +02:00
Ettore Di Giacinto
052af98dcd models(gallery): add qwen2.5-0.5b-instruct, qwen2.5-1.5b-instruct (#3613)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 15:45:23 +02:00
Ettore Di Giacinto
56d8f5163c models(gallery): add qwen2.5-math-72b-instruct (#3612)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 15:12:35 +02:00
Ettore Di Giacinto
b6af4f4467 models(gallery): add qwen2.5-coder-7b-instruct (#3611)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 15:08:57 +02:00
Ettore Di Giacinto
a5b08f43ff models(gallery): add qwen2.5-14b_uncencored (#3610)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 11:22:53 +02:00
Ettore Di Giacinto
c15f506fd5 models(gallery): add qwen2.5-math-7b-instruct (#3609)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 11:18:49 +02:00
Ettore Di Giacinto
a2a63460e9 models(gallery): add qwen2.5-14b-instruct (#3607)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-20 10:59:29 +02:00
LocalAI [bot]
2fcea486eb chore: ⬆️ Update ggerganov/llama.cpp to 6026da52d6942b253df835070619775d849d0258 (#3605)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-20 10:50:14 +02:00
LocalAI [bot]
5c9d26e39b feat(swagger): update swagger (#3604)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-20 10:49:32 +02:00
Ettore Di Giacinto
191bc2e50a feat(api): allow to pass audios to backends (#3603)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-19 12:26:53 +02:00
Ettore Di Giacinto
fbb9facda4 feat(api): allow to pass videos to backends (#3601)
This prepares the API to receive videos as well for video understanding.

It works similarly to images, where the request should be in the form:

{
 "type": "video_url",
 "video_url": { "url": "url or base64 data" }
}

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-19 11:21:59 +02:00
LocalAI [bot]
c6a819e92f chore: ⬆️ Update ggerganov/llama.cpp to 64c6af3195c3cd4aa3328a1282d29cd2635c34c9 (#3598)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-18 21:41:59 +00:00
Ettore Di Giacinto
a50cde69a2 chore(aio): rename gpt-4-vision-preview to gpt-4o (#3597)
Fixes: 3596

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-18 15:55:46 +02:00
LocalAI [bot]
e5bd74878e chore: ⬆️ Update ggerganov/whisper.cpp to 5b1ce40fa882e9cb8630b48032067a1ed2f1534f (#3592)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-18 00:02:02 +02:00
LocalAI [bot]
dc98b2ea44 chore: ⬆️ Update ggerganov/llama.cpp to 8b836ae731bbb2c5640bc47df5b0a78ffcb129cb (#3591)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-17 21:51:41 +00:00
Ettore Di Giacinto
acf119828f Revert "chore(deps): Bump securego/gosec from 2.21.0 to 2.21.2" (#3590)
Revert "chore(deps): Bump securego/gosec from 2.21.0 to 2.21.2 (#3561)"

This reverts commit 12a8d0e46f.
2024-09-17 17:22:56 +02:00
Ettore Di Giacinto
a53392f919 chore(refactor): drop duplicated shutdown logics (#3589)
* chore(refactor): drop duplicated shutdown logics

- Handle locking in Shutdown and CheckModelIsLoaded in a more go-idiomatic way
- Drop duplicated code and re-organize shutdown code

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: drop leftover

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: improve logging and add missing locks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-17 16:51:40 +02:00
dependabot[bot]
eee1fb2c75 chore(deps): Bump pypinyin from 0.50.0 to 0.53.0 in /backend/python/openvoice (#3562)
chore(deps): Bump pypinyin in /backend/python/openvoice

Bumps [pypinyin](https://github.com/mozillazg/python-pinyin) from 0.50.0 to 0.53.0.
- [Release notes](https://github.com/mozillazg/python-pinyin/releases)
- [Changelog](https://github.com/mozillazg/python-pinyin/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/mozillazg/python-pinyin/compare/v0.50.0...v0.53.0)

---
updated-dependencies:
- dependency-name: pypinyin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 10:24:34 +02:00
dependabot[bot]
8826ca93b3 chore(deps): Bump openai from 1.44.0 to 1.45.1 in /examples/langchain/langchainpy-localai-example (#3573)
chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.44.0 to 1.45.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.44.0...v1.45.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 10:24:14 +02:00
dependabot[bot]
5049629381 chore(deps): Bump langchain from 0.2.16 to 0.3.0 in /examples/langchain/langchainpy-localai-example (#3577)
chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.16 to 0.3.0.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.16...langchain==0.3.0)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 10:24:01 +02:00
Dave
92136a5d34 fix: gallery/index.yaml comment spacing (#3585)
extremely minor fix: add a space to index.yaml for the scanner

Signed-off-by: Dave Lee <dave@gray101.com>
2024-09-17 09:23:58 +02:00
Ettore Di Giacinto
075e5015c0 Revert "chore(deps): Bump setuptools from 69.5.1 to 75.1.0 in /backend/python/transformers" (#3586)
Revert "chore(deps): Bump setuptools from 69.5.1 to 75.1.0 in /backend/python…"

This reverts commit e95cb8eaac.
2024-09-17 09:06:07 +02:00
dependabot[bot]
46fd4ff6db chore(deps): Bump openai from 1.44.0 to 1.45.1 in /examples/functions (#3560)
Bumps [openai](https://github.com/openai/openai-python) from 1.44.0 to 1.45.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.44.0...v1.45.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 06:19:52 +00:00
Alexander Izotov
4a4e44bf55 feat: allow setting trust_remote_code for sentencetransformers backend (#3552)
Allow setting trust_remote_code for SentenceTransformers backend

Signed-off-by: Nyralei <93216976+Nyralei@users.noreply.github.com>
2024-09-17 05:52:37 +00:00
dependabot[bot]
22247ad92c chore(deps): Bump langchain from 0.2.16 to 0.3.0 in /examples/langchain-chroma (#3557)
chore(deps): Bump langchain in /examples/langchain-chroma

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.16 to 0.3.0.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.16...langchain==0.3.0)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 05:50:31 +00:00
Ettore Di Giacinto
d0f2bf3181 fix(shutdown): do not shutdown immediately busy backends (#3543)
* fix(shutdown): do not shutdown immediately busy backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(refactor): avoid duplicate functions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: multiplicative backoff for shutdown (#3547)

* multiplicative backoff for shutdown

Rather than always retry every two seconds, back off the shutdown attempt rate? 

Signed-off-by: Dave <dave@gray101.com>

* Update loader.go

Signed-off-by: Dave <dave@gray101.com>

* add clamp of 2 minutes

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Dave <dave@gray101.com>
Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Dave <dave@gray101.com>
Signed-off-by: Dave Lee <dave@gray101.com>
Co-authored-by: Dave <dave@gray101.com>
2024-09-17 04:50:57 +00:00
LocalAI [bot]
0e4e101101 chore: ⬆️ Update ggerganov/llama.cpp to 23e0d70bacaaca1429d365a44aa9e7434f17823b (#3581)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-17 03:52:15 +00:00
dependabot[bot]
f4b1bd8f6d chore(deps): Bump setuptools from 70.3.0 to 75.1.0 in /backend/python/vllm (#3580)
chore(deps): Bump setuptools in /backend/python/vllm

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 03:41:01 +00:00
dependabot[bot]
e95cb8eaac chore(deps): Bump setuptools from 69.5.1 to 75.1.0 in /backend/python/transformers (#3579)
chore(deps): Bump setuptools in /backend/python/transformers

Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 03:33:52 +00:00
Dave
db1159b651 feat: auth v2 - supersedes #2894 (#3476)
feat: auth v2 - supercedes #2894, metrics to follow later

Signed-off-by: Dave Lee <dave@gray101.com>
2024-09-16 23:29:07 -04:00
dependabot[bot]
a9a3a07c3b chore(deps): Bump setuptools from 72.1.0 to 75.1.0 in /backend/python/rerankers (#3578)
chore(deps): Bump setuptools in /backend/python/rerankers

Bumps [setuptools](https://github.com/pypa/setuptools) from 72.1.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v72.1.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 03:24:30 +00:00
dependabot[bot]
06c8339862 chore(deps): Bump setuptools from 70.3.0 to 75.1.0 in /backend/python/bark (#3574)
chore(deps): Bump setuptools in /backend/python/bark

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 02:32:33 +00:00
dependabot[bot]
2394f7833f chore(deps): Bump setuptools from 70.3.0 to 75.1.0 in /backend/python/diffusers (#3575)
chore(deps): Bump setuptools in /backend/python/diffusers

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 02:28:05 +00:00
dependabot[bot]
36e19928eb chore(deps): Bump greenlet from 3.0.3 to 3.1.0 in /examples/langchain/langchainpy-localai-example (#3571)
chore(deps): Bump greenlet

Bumps [greenlet](https://github.com/python-greenlet/greenlet) from 3.0.3 to 3.1.0.
- [Changelog](https://github.com/python-greenlet/greenlet/blob/master/CHANGES.rst)
- [Commits](https://github.com/python-greenlet/greenlet/compare/3.0.3...3.1.0)

---
updated-dependencies:
- dependency-name: greenlet
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 01:14:39 +00:00
dependabot[bot]
abc27e0dc4 chore(deps): Bump setuptools from 72.1.0 to 75.1.0 in /backend/python/vall-e-x (#3570)
chore(deps): Bump setuptools in /backend/python/vall-e-x

Bumps [setuptools](https://github.com/pypa/setuptools) from 72.1.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v72.1.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 00:51:55 +00:00
dependabot[bot]
42d6b9e0cc chore(deps): Bump weaviate-client from 4.6.7 to 4.8.1 in /examples/chainlit (#3568)
chore(deps): Bump weaviate-client in /examples/chainlit

Bumps [weaviate-client](https://github.com/weaviate/weaviate-python-client) from 4.6.7 to 4.8.1.
- [Release notes](https://github.com/weaviate/weaviate-python-client/releases)
- [Changelog](https://github.com/weaviate/weaviate-python-client/blob/main/docs/changelog.rst)
- [Commits](https://github.com/weaviate/weaviate-python-client/compare/v4.6.7...v4.8.1)

---
updated-dependencies:
- dependency-name: weaviate-client
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 00:11:15 +00:00
dependabot[bot]
c866b77586 chore(deps): Bump llama-index from 0.11.7 to 0.11.9 in /examples/chainlit (#3567)
chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.11.7 to 0.11.9.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.11.7...v0.11.9)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-17 00:02:42 +00:00
dependabot[bot]
5356b81b7f chore(deps): Bump sentence-transformers from 3.0.1 to 3.1.0 in /backend/python/sentencetransformers (#3566)
chore(deps): Bump sentence-transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.0.1 to 3.1.0.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.0.1...v3.1.0)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 23:40:39 +00:00
dependabot[bot]
30fe163100 chore(deps): Bump setuptools from 72.1.0 to 75.1.0 in /backend/python/parler-tts (#3565)
chore(deps): Bump setuptools in /backend/python/parler-tts

Bumps [setuptools](https://github.com/pypa/setuptools) from 72.1.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v72.1.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 23:13:09 +00:00
dependabot[bot]
afb5bbc1b8 chore(deps): Bump setuptools from 69.5.1 to 75.1.0 in /backend/python/transformers-musicgen (#3564)
chore(deps): Bump setuptools in /backend/python/transformers-musicgen

Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 23:03:06 +00:00
dependabot[bot]
12a8d0e46f chore(deps): Bump securego/gosec from 2.21.0 to 2.21.2 (#3561)
Bumps [securego/gosec](https://github.com/securego/gosec) from 2.21.0 to 2.21.2.
- [Release notes](https://github.com/securego/gosec/releases)
- [Changelog](https://github.com/securego/gosec/blob/master/.goreleaser.yml)
- [Commits](https://github.com/securego/gosec/compare/v2.21.0...v2.21.2)

---
updated-dependencies:
- dependency-name: securego/gosec
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 21:57:16 +00:00
dependabot[bot]
09c7d8d458 chore(deps): Bump setuptools from 72.1.0 to 75.1.0 in /backend/python/autogptq (#3553)
chore(deps): Bump setuptools in /backend/python/autogptq

Bumps [setuptools](https://github.com/pypa/setuptools) from 72.1.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v72.1.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 21:46:26 +00:00
dependabot[bot]
149cc1eb13 chore(deps): Bump openai from 1.44.1 to 1.45.1 in /examples/langchain-chroma (#3556)
chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.44.1 to 1.45.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.44.1...v1.45.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 21:44:34 +00:00
dependabot[bot]
a5ce987bdb chore(deps): Bump langchain from 0.2.16 to 0.3.0 in /examples/functions (#3559)
Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.16 to 0.3.0.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.16...langchain==0.3.0)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 21:35:10 +00:00
dependabot[bot]
2edc732c33 chore(deps): Bump setuptools from 72.1.0 to 75.1.0 in /backend/python/coqui (#3554)
chore(deps): Bump setuptools in /backend/python/coqui

Bumps [setuptools](https://github.com/pypa/setuptools) from 72.1.0 to 75.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v72.1.0...v75.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 21:23:06 +00:00
dependabot[bot]
fec01d9e69 chore(deps): Bump docs/themes/hugo-theme-relearn from f696f60 to d5a0ee0 (#3558)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `f696f60` to `d5a0ee0`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](f696f60f4e...d5a0ee04ad)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 21:00:35 +00:00
Ettore Di Giacinto
9ca5ef339a models(gallery): add llama-3.1-8b-stheno-v3.4-iq-imatrix (#3551)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-16 16:44:00 +02:00
Ettore Di Giacinto
a8003f2b7c models(gallery): add l3.1-8b-niitama-v1.1-iq-imatrix (#3550)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-16 13:06:07 +02:00
Ettore Di Giacinto
25deb4ba95 chore(deps): update llama.cpp to 6262d13e0b2da91f230129a93a996609a2fa2f2 (#3549)
chore(deps): update llama.cpp to 6262d13e0b2da91f230129a93a996609a2f5a2f2

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-16 10:29:20 +02:00
LocalAI [bot]
3d3db1d74f chore: ⬆️ Update ggerganov/whisper.cpp to 049b3a0e53c8a8e4c4576c06a1a4fccf0063a73f (#3548)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-15 21:40:51 +00:00
LocalAI [bot]
cabb1602e8 chore: ⬆️ Update ggerganov/llama.cpp to feff4aa8461da7c432d144c11da4802e41fef3cf (#3542)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-09-13 22:05:38 +00:00
Ettore Di Giacinto
25e7661de2 chore(exllama): drop exllama backend (#3536)
* chore(exllama): drop exllama backend

For polishing and cleaning up it makes now sense to drop exllama which
is completely unmaintained, and was only supporting the llamav1
architecture (nowadays it's superseded by llamav1) .

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(gosec): fix CI

downgrade to latest known version of the gosec action

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:52:13 +00:00
Ettore Di Giacinto
cbfab81c35 models(gallery): add datagemma models (#3540)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 21:49:18 +02:00
Ettore Di Giacinto
925315ab5c models(gallery): add mn-12b-lyra-v4-iq-imatrix (#3539)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:49:14 +02:00
Ettore Di Giacinto
5213e79f5c models(gallery): add azure_dusk-v0.2-iq-imatrix (#3538)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:48:54 +02:00
Ettore Di Giacinto
7fe6d0ad2b chore(gosec): fix CI (#3537)
downgrade to latest known version of the gosec action

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:19:26 +02:00
154 changed files with 3416 additions and 1325 deletions

View File

@@ -9,6 +9,7 @@
# Param 2: email
#
config_user() {
echo "Configuring git for $1 <$2>"
local gcn=$(git config --global user.name)
if [ -z "${gcn}" ]; then
echo "Setting up git user / remote"
@@ -24,6 +25,7 @@ config_user() {
# Param 2: remote url
#
config_remote() {
echo "Adding git remote and fetching $2 as $1"
local gr=$(git remote -v | grep $1)
if [ -z "${gr}" ]; then
git remote add $1 $2

View File

@@ -29,9 +29,14 @@ def calculate_sha256(file_path):
def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json()
if scan['hasUnsafeFile']:
return scan
return None
# Check if 'hasUnsafeFile' exists in the response
if 'hasUnsafeFile' in scan:
if scan['hasUnsafeFile']:
return scan
else:
return None
else:
return None
download_type, repo_id_or_url = parse_uri(uri)

View File

@@ -6,6 +6,7 @@ import (
"io/ioutil"
"os"
"github.com/microcosm-cc/bluemonday"
"gopkg.in/yaml.v3"
)
@@ -279,6 +280,12 @@ func main() {
return
}
// Ensure that all arbitrary text content is sanitized before display
for i, m := range models {
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
}
// render the template
data := struct {
Models []*GalleryModel

View File

@@ -33,7 +33,7 @@ jobs:
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.0.3
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.0.3
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -13,6 +13,78 @@ concurrency:
cancel-in-progress: true
jobs:
hipblas-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: 2
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs:
uses: ./.github/workflows/image_build.yml
with:
@@ -39,7 +111,7 @@ jobs:
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
matrix:
include:
# Extra images
@@ -122,29 +194,6 @@ jobs:
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
@@ -212,26 +261,6 @@ jobs:
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@master
uses: securego/gosec@v2.21.4
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -178,13 +178,22 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
# Install protoc
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build images
run: |
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test
run: |
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}

View File

@@ -15,8 +15,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
- [Documentation](#documentation)
- [Community and Communication](#community-and-communication)
## Getting Started
### Prerequisites
@@ -54,7 +52,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
## Coding Guidelines
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
## Testing
@@ -84,5 +82,3 @@ We are welcome the contribution of the documents, please open new PR or create a
- You can reach out via the Github issue tracker.
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
---

View File

@@ -9,11 +9,13 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root
ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH
ARG TARGETVARIANT
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
RUN apt-get update && \
@@ -21,13 +23,25 @@ RUN apt-get update && \
build-essential \
ccache \
ca-certificates \
cmake \
curl \
curl libssl-dev \
git \
unzip upx-ucl && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
@@ -188,6 +202,8 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0
ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.26.4
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -196,12 +212,24 @@ WORKDIR /build
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
build-essential \
cmake \
build-essential curl libssl-dev \
git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
# and running make install in the target container
@@ -297,10 +325,10 @@ COPY .git .
RUN make prepare
## Build the binary
## If it's CUDA, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas build
## (both will use CUDA for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \
make build; \
@@ -338,9 +366,8 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ssh less && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ssh less wget
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
RUN go install github.com/go-delve/delve/cmd/dlv@latest
@@ -418,9 +445,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers-musicgen \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama \
; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=e6b7801bd189d102d901d3e72035611a25456ef1
CPPLLAMA_VERSION?=96776405a17034dcfd53d3ddf5d142d34bdbb657
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf
WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -359,6 +359,9 @@ clean-tests:
rm -rf test-dir
rm -rf core/http/backend-assets
clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets
## Build:
build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
@@ -465,15 +468,15 @@ run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
run-e2e-aio:
run-e2e-aio: protogen-go
@echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
teardown-e2e:
rm -rf $(TEST_DIR) || true
@@ -481,24 +484,24 @@ teardown-e2e:
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
test-stores: backend-assets/grpc/local-store
mkdir -p tests/integration/backend-assets/grpc
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
test-container:
docker build --target requirements -t local-ai-test-container .
@@ -534,10 +537,10 @@ protogen-go-clean:
$(RM) bin/*
.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
@@ -571,14 +574,6 @@ diffusers-protogen:
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: exllama-protogen
exllama-protogen:
$(MAKE) -C backend/python/exllama protogen
.PHONY: exllama-protogen-clean
exllama-protogen-clean:
$(MAKE) -C backend/python/exllama protogen-clean
.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
@@ -675,7 +670,6 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python

View File

@@ -68,9 +68,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
## 🔥🔥 Hot topics / Roadmap
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
## 📰 Latest project news
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
@@ -83,8 +81,12 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
Hot topics (looking for contributors):
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
## 🔥🔥 Hot topics (looking for help):
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
- Realtime API https://github.com/mudler/LocalAI/issues/3714
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
name: gpt-4o
roles:
user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
name: gpt-4o
roles:
user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
mmap: false
f16: false
name: gpt-4-vision-preview
name: gpt-4o
roles:
user: "USER:"

View File

@@ -26,6 +26,19 @@ service Backend {
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
rpc Rerank(RerankRequest) returns (RerankResult) {}
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
}
// Define the empty request
message MetricsRequest {}
message MetricsResponse {
int32 slot_id = 1;
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
float tokens_per_second = 3;
int32 tokens_generated = 4;
int32 prompt_tokens_processed = 5;
}
message RerankRequest {
@@ -134,6 +147,9 @@ message PredictOptions {
repeated string Images = 42;
bool UseTokenizerTemplate = 43;
repeated Message Messages = 44;
repeated string Videos = 45;
repeated string Audios = 46;
string CorrelationId = 47;
}
// The response message containing the result

View File

@@ -13,6 +13,7 @@
#include <getopt.h>
#include "clip.h"
#include "llava.h"
#include "log.h"
#include "stb_image.h"
#include "common.h"
#include "json.hpp"
@@ -112,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret;
for (; begin != end; ++begin)
{
ret += llama_token_to_piece(ctx, *begin);
ret += common_token_to_piece(ctx, *begin);
}
return ret;
}
@@ -120,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
std::string out = token == -1 ? "" : common_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -202,8 +203,8 @@ struct llama_client_slot
std::string stopping_word;
// sampling
struct gpt_sampler_params sparams;
gpt_sampler *ctx_sampling = nullptr;
struct common_sampler_params sparams;
common_sampler *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state
int32_t ga_n = 1; // group-attention factor
@@ -256,7 +257,7 @@ struct llama_client_slot
images.clear();
}
bool has_budget(gpt_params &global_params) {
bool has_budget(common_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1)
{
return true; // limitless
@@ -397,7 +398,7 @@ struct llama_server_context
clip_ctx *clp_ctx = nullptr;
gpt_params params;
common_params params;
llama_batch batch;
@@ -440,7 +441,7 @@ struct llama_server_context
}
}
bool load_model(const gpt_params &params_)
bool load_model(const common_params &params_)
{
params = params_;
if (!params.mmproj.empty()) {
@@ -448,7 +449,7 @@ struct llama_server_context
LOG_INFO("Multi Modal Mode Enabled", {});
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
if(clp_ctx == nullptr) {
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
return false;
}
@@ -457,12 +458,12 @@ struct llama_server_context
}
}
llama_init_result llama_init = llama_init_from_gpt_params(params);
model = llama_init.model;
ctx = llama_init.context;
common_init_result common_init = common_init_from_params(params);
model = common_init.model;
ctx = common_init.context;
if (model == nullptr)
{
LOG_ERROR("unable to load model", {{"model", params.model}});
LOG_ERR("unable to load model: %s", params.model.c_str());
return false;
}
@@ -470,7 +471,7 @@ struct llama_server_context
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_n_embd(model);
if (n_embd_clip != n_embd_llm) {
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
llama_free(ctx);
llama_free_model(model);
return false;
@@ -489,11 +490,21 @@ struct llama_server_context
std::vector<char> buf(1);
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
if (res < 0) {
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
}
}
llama_client_slot* get_active_slot() {
for (llama_client_slot& slot : slots) {
// Check if the slot is currently processing
if (slot.is_processing()) {
return &slot; // Return the active slot
}
}
return nullptr; // No active slot found
}
void initialize() {
// create slots
all_slots_are_idle = true;
@@ -567,12 +578,12 @@ struct llama_server_context
std::vector<llama_token> p;
if (first)
{
p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
first = false;
}
else
{
p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
}
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
}
@@ -589,7 +600,7 @@ struct llama_server_context
else
{
auto s = json_prompt.template get<std::string>();
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
}
return prompt_tokens;
@@ -618,7 +629,7 @@ struct llama_server_context
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params;
gpt_sampler_params default_sparams;
common_sampler_params default_sparams;
slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -758,7 +769,7 @@ struct llama_server_context
}
else if (el[0].is_string())
{
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
auto toks = common_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks)
{
slot->sparams.logit_bias.push_back({tok, bias});
@@ -790,7 +801,7 @@ struct llama_server_context
sampler_names.emplace_back(name);
}
}
slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false);
}
else
{
@@ -812,10 +823,11 @@ struct llama_server_context
img_sl.img_data = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
{
LOG_ERROR("failed to load image", {
{"slot_id", slot->id},
{"img_sl_id", img_sl.id}
});
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
__func__,
slot->id,
img_sl.id
);
return false;
}
LOG_VERBOSE("image loaded", {
@@ -853,12 +865,12 @@ struct llama_server_context
}
}
if (!found) {
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
LOG("ERROR: Image with id: %i, not found.\n", img_id);
slot->images.clear();
return false;
}
} catch (const std::invalid_argument& e) {
LOG_TEE("Invalid image number id in prompt\n");
LOG("Invalid image number id in prompt\n");
slot->images.clear();
return false;
}
@@ -873,9 +885,9 @@ struct llama_server_context
if (slot->ctx_sampling != nullptr)
{
gpt_sampler_free(slot->ctx_sampling);
common_sampler_free(slot->ctx_sampling);
}
slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
slot->ctx_sampling = common_sampler_init(model, slot->sparams);
//llama_set_rng_seed(ctx, slot->params.seed);
slot->command = LOAD_PROMPT;
@@ -886,7 +898,7 @@ struct llama_server_context
{"task_id", slot->task_id},
});
// LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true;
}
@@ -902,13 +914,13 @@ struct llama_server_context
system_tokens.clear();
if (!system_prompt.empty()) {
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
system_tokens = common_tokenize(ctx, system_prompt, add_bos_token);
llama_batch_clear(batch);
common_batch_clear(batch);
for (int i = 0; i < (int)system_tokens.size(); ++i)
{
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
common_batch_add(batch, system_tokens[i], i, { 0 }, false);
}
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
@@ -926,7 +938,7 @@ struct llama_server_context
};
if (llama_decode(ctx, batch_view) != 0)
{
LOG_TEE("%s: llama_decode() failed\n", __func__);
LOG("%s: llama_decode() failed\n", __func__);
return;
}
}
@@ -938,7 +950,7 @@ struct llama_server_context
}
}
LOG_TEE("system prompt updated\n");
LOG("system prompt updated\n");
system_need_update = false;
}
@@ -997,7 +1009,7 @@ struct llama_server_context
bool process_token(completion_token_output &result, llama_client_slot &slot) {
// remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = llama_token_to_piece(ctx, result.tok);
const std::string token_str = common_token_to_piece(ctx, result.tok);
slot.sampled = result.tok;
// search stop word and delete it
@@ -1120,7 +1132,7 @@ struct llama_server_context
}
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG_TEE("Error processing the given image");
LOG("Error processing the given image");
return false;
}
@@ -1132,7 +1144,7 @@ struct llama_server_context
void send_error(task_server& task, const std::string &error)
{
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
LOG("task %i - error: %s\n", task.id, error.c_str());
task_result res;
res.id = task.id;
res.multitask_id = task.multitask_id;
@@ -1148,7 +1160,7 @@ struct llama_server_context
samplers.reserve(slot.sparams.samplers.size());
for (const auto & sampler : slot.sparams.samplers)
{
samplers.emplace_back(gpt_sampler_type_to_str(sampler));
samplers.emplace_back(common_sampler_type_to_str(sampler));
}
return json {
@@ -1204,7 +1216,7 @@ struct llama_server_context
if (slot.sparams.n_probs > 0)
{
std::vector<completion_token_output> probs_output = {};
const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false);
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
if (probs_pos < probs_stop_pos)
@@ -1256,7 +1268,7 @@ struct llama_server_context
std::vector<completion_token_output> probs = {};
if (!slot.params.stream && slot.stopped_word)
{
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false);
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
}
else
@@ -1371,7 +1383,7 @@ struct llama_server_context
};
if (llama_decode(ctx, batch_view))
{
LOG_TEE("%s : failed to eval\n", __func__);
LOG("%s : failed to eval\n", __func__);
return false;
}
}
@@ -1389,14 +1401,14 @@ struct llama_server_context
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
if (llama_decode(ctx, batch_img))
{
LOG_TEE("%s : failed to eval image\n", __func__);
LOG("%s : failed to eval image\n", __func__);
return false;
}
slot.n_past += n_eval;
}
image_idx++;
llama_batch_clear(batch);
common_batch_clear(batch);
// append prefix of next image
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
@@ -1406,7 +1418,7 @@ struct llama_server_context
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
for (int i = 0; i < (int) append_tokens.size(); ++i)
{
llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_past += 1;
}
}
@@ -1538,7 +1550,7 @@ struct llama_server_context
update_system_prompt();
}
llama_batch_clear(batch);
common_batch_clear(batch);
if (all_slots_are_idle)
{
@@ -1572,7 +1584,7 @@ struct llama_server_context
slot.n_past = 0;
slot.truncated = false;
slot.has_next_token = true;
LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
continue;
// END LOCALAI changes
@@ -1616,7 +1628,7 @@ struct llama_server_context
// TODO: we always have to take into account the "system_tokens"
// this is not great and needs to be improved somehow
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
slot.n_past += 1;
}
@@ -1710,7 +1722,7 @@ struct llama_server_context
if (!slot.params.cache_prompt)
{
gpt_sampler_reset(slot.ctx_sampling);
common_sampler_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.n_past_se = 0;
@@ -1722,7 +1734,7 @@ struct llama_server_context
// push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens)
{
gpt_sampler_accept(slot.ctx_sampling, token, false);
common_sampler_accept(slot.ctx_sampling, token, false);
}
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1814,16 +1826,17 @@ struct llama_server_context
ga_i += ga_w/ga_n;
}
}
llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
slot_npast++;
}
if (has_images && !ingest_images(slot, n_batch))
{
LOG_ERROR("failed processing images", {
"slot_id", slot.id,
"task_id", slot.task_id,
});
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
__func__,
slot.id,
slot.task_id
);
// FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value
@@ -1863,10 +1876,10 @@ struct llama_server_context
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
LOG_TEE("\n");
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
LOG("\n");
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
@@ -1876,7 +1889,7 @@ struct llama_server_context
slot.ga_i += slot.ga_w / slot.ga_n;
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
}
slot.n_past_se += n_tokens;
}
@@ -1901,11 +1914,11 @@ struct llama_server_context
if (n_batch == 1 || ret < 0)
{
// if you get here, it means the KV cache is full - try increasing it via the context size
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
return false;
}
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
// retry with half the batch size to try to find a free slot in the KV cache
n_batch /= 2;
@@ -1930,9 +1943,9 @@ struct llama_server_context
}
completion_token_output result;
const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
gpt_sampler_accept(slot.ctx_sampling, id, true);
common_sampler_accept(slot.ctx_sampling, id, true);
slot.n_decoded += 1;
if (slot.n_decoded == 1)
@@ -1943,7 +1956,7 @@ struct llama_server_context
}
result.tok = id;
const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling);
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
result.probs.push_back({
@@ -1996,7 +2009,7 @@ static json format_partial_response(
struct token_translator
{
llama_context * ctx;
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); }
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
};
@@ -2103,6 +2116,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["ignore_eos"] = predict->ignoreeos();
data["embeddings"] = predict->embeddings();
// Add the correlationid to json data
data["correlation_id"] = predict->correlationid();
// for each image in the request, add the image data
//
for (int i = 0; i < predict->images_size(); i++) {
@@ -2187,7 +2203,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// }
static void params_parse(const backend::ModelOptions* request,
gpt_params & params) {
common_params & params) {
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
@@ -2295,7 +2311,7 @@ public:
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
// Implement LoadModel RPC
gpt_params params;
common_params params;
params_parse(request, params);
llama_backend_init();
@@ -2341,6 +2357,11 @@ public:
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply.set_prompt_tokens(tokens_evaluated);
// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});
// Send the reply
writer->Write(reply);
@@ -2364,6 +2385,12 @@ public:
std::string completion_text;
task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) {
// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});
completion_text = result.result_json.value("content", "");
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
@@ -2403,6 +2430,31 @@ public:
return grpc::Status::OK;
}
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot();
if (active_slot != nullptr) {
// Calculate the tokens per second using existing logic
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
// Populate the response with metrics
response->set_slot_id(active_slot->id);
response->set_prompt_json_for_slot(active_slot->prompt.dump());
response->set_tokens_per_second(tokens_per_second);
response->set_tokens_generated(active_slot->n_decoded);
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
} else {
// Handle case when no active slot exists
response->set_slot_id(0);
response->set_prompt_json_for_slot("");
response->set_tokens_per_second(0);
response->set_tokens_generated(0);
response->set_prompt_tokens_processed(0);
}
return grpc::Status::OK;
}
};
void RunServer(const std::string& server_address) {

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
transformers

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi

View File

@@ -1,2 +1,2 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@@ -1,4 +1,4 @@
TTS==0.22.0
grpcio==1.66.1
coqui-tts
grpcio==1.66.2
protobuf
certifi

View File

@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server
"""
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
time.sleep(30)
def tearDown(self) -> None:
"""

View File

@@ -3,7 +3,7 @@ intel-extension-for-pytorch
torch
torchvision
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
diffusers
opencv-python
transformers

View File

@@ -1,5 +1,5 @@
setuptools
grpcio==1.66.1
grpcio==1.66.2
pillow
protobuf
certifi

View File

@@ -1 +0,0 @@
source

View File

@@ -1,25 +0,0 @@
export CONDA_ENV_PATH = "exllama.yml"
.PHONY: exllama
exllama: protogen
bash install.sh ${CONDA_ENV_PATH}
.PHONY: run
run: protogen
@echo "Running exllama..."
bash run.sh
@echo "exllama run."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
$(RM) -r venv source __pycache__

View File

@@ -1,5 +0,0 @@
# Creating a separate environment for the exllama project
```
make exllama
```

View File

@@ -1,159 +0,0 @@
#!/usr/bin/env python3
import grpc
from concurrent import futures
import time
import backend_pb2
import backend_pb2_grpc
import argparse
import signal
import sys
import os, glob
from pathlib import Path
import torch
import torch.nn.functional as F
from torch import version as torch_version
from source.tokenizer import ExLlamaTokenizer
from source.generator import ExLlamaGenerator
from source.model import ExLlama, ExLlamaCache, ExLlamaConfig
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
def generate(self,prompt, max_new_tokens):
self.generator.end_beam_search()
# Tokenizing the input
ids = self.generator.tokenizer.encode(prompt)
self.generator.gen_begin_reuse(ids)
initial_len = self.generator.sequence[0].shape[0]
has_leading_space = False
decoded_text = ''
for i in range(max_new_tokens):
token = self.generator.gen_single_token()
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith(''):
has_leading_space = True
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
if has_leading_space:
decoded_text = ' ' + decoded_text
if token.item() == self.generator.tokenizer.eos_token_id:
break
return decoded_text
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
try:
# https://github.com/turboderp/exllama/blob/master/example_cfg.py
model_directory = request.ModelFile
# Locate files we need within that directory
tokenizer_path = os.path.join(model_directory, "tokenizer.model")
model_config_path = os.path.join(model_directory, "config.json")
st_pattern = os.path.join(model_directory, "*.safetensors")
model_path = glob.glob(st_pattern)[0]
# Create config, model, tokenizer and generator
config = ExLlamaConfig(model_config_path) # create config from config.json
config.model_path = model_path # supply path to model weights file
if (request.ContextSize):
config.max_seq_len = request.ContextSize # override max sequence length
config.max_attention_size = request.ContextSize**2 # Should be set to context_size^2.
# https://github.com/turboderp/exllama/issues/220#issuecomment-1720324163
# Set Rope scaling.
if (request.RopeFreqScale):
# Alpha value for Rope scaling.
# Higher value increases context but adds perplexity.
# alpha_value and compress_pos_emb are mutually exclusive.
# https://github.com/turboderp/exllama/issues/115
config.alpha_value = request.RopeFreqScale
config.calculate_rotary_embedding_base()
model = ExLlama(config) # create ExLlama instance and load the weights
tokenizer = ExLlamaTokenizer(tokenizer_path) # create tokenizer from tokenizer model file
cache = ExLlamaCache(model, batch_size = 2) # create cache for inference
generator = ExLlamaGenerator(model, tokenizer, cache) # create generator
self.generator= generator
self.model = model
self.tokenizer = tokenizer
self.cache = cache
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Predict(self, request, context):
penalty = 1.15
if request.Penalty != 0.0:
penalty = request.Penalty
self.generator.settings.token_repetition_penalty_max = penalty
self.generator.settings.temperature = request.Temperature
self.generator.settings.top_k = request.TopK
self.generator.settings.top_p = request.TopP
tokens = 512
if request.Tokens != 0:
tokens = request.Tokens
if self.cache.batch_size == 1:
del self.cache
self.cache = ExLlamaCache(self.model, batch_size=2)
self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
t = self.generate(request.Prompt, tokens)
# Remove prompt from response if present
if request.Prompt in t:
t = t.replace(request.Prompt, "")
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
def PredictStream(self, request, context):
# Implement PredictStream RPC
#for reply in some_data_generator():
# yield reply
# Not implemented yet
return self.Predict(request, context)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@@ -1,13 +0,0 @@
#!/bin/bash
set -e
LIMIT_TARGETS="cublas"
source $(dirname $0)/../common/libbackend.sh
installRequirements
git clone https://github.com/turboderp/exllama $MY_DIR/source
uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
cp -v ./*py $MY_DIR/source/

View File

@@ -1,3 +0,0 @@
transformers
accelerate
torch

View File

@@ -1,4 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
transformers
accelerate

View File

@@ -1,3 +0,0 @@
torch
transformers
accelerate

View File

@@ -1,4 +0,0 @@
grpcio==1.66.1
protobuf
certifi
setuptools

View File

@@ -1,7 +0,0 @@
#!/bin/bash
LIMIT_TARGETS="cublas"
BACKEND_FILE="${MY_DIR}/source/backend.py"
source $(dirname $0)/../common/libbackend.sh
startBackend $@

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
runUnittests

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
wheel

View File

@@ -1,3 +1,3 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi

View File

@@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.66.1
grpcio==1.66.2
protobuf
librosa==0.9.1
faster-whisper==1.0.3
@@ -18,6 +18,6 @@ python-dotenv
pypinyin==0.50.0
cn2an==0.5.22
jieba==0.42.1
gradio==4.38.1
gradio==4.44.1
langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
librosa
faster-whisper

View File

@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server
"""
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
time.sleep(30)
def tearDown(self) -> None:
"""

View File

@@ -15,5 +15,12 @@ installRequirements
# https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions.
PYDIR=$(ls ${MY_DIR}/venv/lib)
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
PYDIR=python3.10
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
if [ ! -d ${pyenv} ]; then
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
exit 1
fi
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
llvmlite==0.43.0

View File

@@ -5,4 +5,4 @@ accelerate
torch
rerankers[transformers]
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,3 +1,3 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi

View File

@@ -55,7 +55,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
model_name = request.Model
try:
self.model = SentenceTransformer(model_name)
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

View File

@@ -2,5 +2,5 @@ torch
accelerate
transformers
bitsandbytes
sentence-transformers==3.0.1
sentence-transformers==3.1.1
transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
accelerate
sentence-transformers==3.0.1
sentence-transformers==3.1.1
transformers

View File

@@ -1,4 +1,4 @@
torch
accelerate
sentence-transformers==3.0.1
sentence-transformers==3.1.1
transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
accelerate
sentence-transformers==3.0.1
sentence-transformers==3.1.1
transformers

View File

@@ -4,5 +4,5 @@ torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
accelerate
sentence-transformers==3.0.1
sentence-transformers==3.1.1
transformers

View File

@@ -1,3 +1,5 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
certifi
datasets
einops

View File

@@ -4,4 +4,4 @@ transformers
accelerate
torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
scipy==1.14.0
certifi

View File

@@ -72,7 +72,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
Returns:
A Result object that contains the result of the LoadModel operation.
"""
model_name = request.Model
# Check to see if the Model exists in the filesystem already.
if os.path.exists(request.ModelFile):
model_name = request.ModelFile
compute = torch.float16
if request.F16Memory == True:

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -4,4 +4,4 @@ accelerate
torch
torchaudio
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,3 +1,3 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi

View File

@@ -5,6 +5,8 @@ import argparse
import signal
import sys
import os
from typing import List
from PIL import Image
import backend_pb2
import backend_pb2_grpc
@@ -15,6 +17,8 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.sampling_params import SamplingParams
from vllm.utils import random_uuid
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.multimodal.utils import fetch_image
from vllm.assets.video import VideoAsset
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -105,6 +109,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
try:
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
except Exception as err:
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
try:
@@ -117,7 +122,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
print("Model loaded successfully", file=sys.stderr)
return backend_pb2.Result(message="Model loaded successfully", success=True)
async def Predict(self, request, context):
@@ -196,15 +201,33 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.Seed != 0:
sampling_params.seed = request.Seed
# Extract image paths and process images
prompt = request.Prompt
# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
image_paths = request.Images
image_data = [self.load_image(img_path) for img_path in image_paths]
videos_path = request.Videos
video_data = [self.load_video(video_path) for video_path in videos_path]
# If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
# Generate text
# Generate text using the LLM engine
request_id = random_uuid()
outputs = self.llm.generate(prompt, sampling_params, request_id)
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
outputs = self.llm.generate(
{
"prompt": prompt,
"multi_modal_data": {
"image": image_data if image_data else None,
"video": video_data if video_data else None,
} if image_data or video_data else None,
},
sampling_params=sampling_params,
request_id=request_id,
)
# Stream the results
generated_text = ""
@@ -227,9 +250,49 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if streaming:
return
# Remove the image files from /tmp folder
for img_path in image_paths:
try:
os.remove(img_path)
except Exception as e:
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
# Sending the final generated text
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
def load_image(self, image_path: str):
"""
Load an image from the given file path.
Args:
image_path (str): The path to the image file.
Returns:
Image: The loaded image.
"""
try:
return Image.open(image_path)
except Exception as e:
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
return self.load_video(image_path)
def load_video(self, video_path: str):
"""
Load a video from the given file path.
Args:
video_path (str): The path to the image file.
Returns:
Video: The loaded video.
"""
try:
video = VideoAsset(name=video_path).np_ndarrays
return video
except Exception as e:
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
return None
async def serve(address):
# Start asyncio gRPC server
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))

View File

@@ -13,4 +13,20 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
installRequirements
# We don't embed this into the images as it is a large dependency and not always needed.
# Besides, the speed inference are not actually usable in the current state for production use-cases.
if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
ensureVenv
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
if [ ! -d vllm ]; then
git clone https://github.com/vllm-project/vllm
fi
pushd vllm
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
VLLM_TARGET_DEVICE=cpu python setup.py install
popd
rm -rf vllm
else
installRequirements
fi

View File

@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
accelerate
torch
transformers
transformers
bitsandbytes

View File

@@ -1,3 +1,4 @@
accelerate
torch
transformers
transformers
bitsandbytes

View File

@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
accelerate
torch
transformers
transformers
bitsandbytes

View File

@@ -4,4 +4,5 @@ accelerate
torch
transformers
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
bitsandbytes

View File

@@ -1,4 +1,4 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
setuptools

View File

@@ -10,20 +10,11 @@ import (
)
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model
grpcOpts := gRPCModelOpts(backendConfig)
var inferenceModel interface{}
var err error
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*backendConfig.Threads)),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
if backendConfig.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)

View File

@@ -8,19 +8,8 @@ import (
)
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
threads := backendConfig.Threads
if *threads == 0 && appConfig.Threads != 0 {
threads = &appConfig.Threads
}
gRPCOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(backendConfig.Backend),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithThreads(uint32(*threads)),
model.WithContext(appConfig.Context),
model.WithModel(backendConfig.Model),
model.WithLoadGRPCLoadModelOpts(gRPCOpts),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
inferenceModel, err := loader.BackendLoader(
opts...,

View File

@@ -31,24 +31,13 @@ type TokenUsage struct {
Completion int
}
func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
threads := c.Threads
if *threads == 0 && o.Threads != 0 {
threads = &o.Threads
}
grpcOpts := gRPCModelOpts(c)
var inferenceModel grpc.Backend
var err error
opts := modelOpts(c, o, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
model.WithAssetDir(o.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(o.Context),
})
opts := ModelOptions(c, o, []model.Option{})
if c.Backend != "" {
opts = append(opts, model.WithBackendString(c.Backend))
@@ -101,6 +90,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
opts.Messages = protoMessages
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
opts.Images = images
opts.Videos = videos
opts.Audios = audios
tokenUsage := TokenUsage{}

View File

@@ -11,32 +11,65 @@ import (
"github.com/rs/zerolog/log"
)
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
name := c.Name
if name == "" {
name = c.Model
}
defOpts := []model.Option{
model.WithBackendString(c.Backend),
model.WithModel(c.Model),
model.WithAssetDir(so.AssetsDestination),
model.WithContext(so.Context),
model.WithModelID(name),
}
threads := 1
if c.Threads != nil {
threads = *c.Threads
}
if so.Threads != 0 {
threads = so.Threads
}
c.Threads = &threads
grpcOpts := grpcModelOpts(c)
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
if so.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
defOpts = append(defOpts, model.WithSingleActiveBackend())
}
if so.ParallelBackendRequests {
opts = append(opts, model.EnableParallelRequests)
defOpts = append(defOpts, model.EnableParallelRequests)
}
if c.GRPC.Attempts != 0 {
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts))
}
if c.GRPC.AttemptsSleepTime != 0 {
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
}
for k, v := range so.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
defOpts = append(defOpts, model.WithExternalBackend(k, v))
}
return opts
return append(defOpts, opts...)
}
func getSeed(c config.BackendConfig) int32 {
seed := int32(*c.Seed)
var seed int32 = config.RAND_SEED
if c.Seed != nil {
seed = int32(*c.Seed)
}
if seed == config.RAND_SEED {
seed = rand.Int31()
}
@@ -44,11 +77,47 @@ func getSeed(c config.BackendConfig) int32 {
return seed
}
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
}
f16 := false
if c.F16 != nil {
f16 = *c.F16
}
embeddings := false
if c.Embeddings != nil {
embeddings = *c.Embeddings
}
lowVRAM := false
if c.LowVRAM != nil {
lowVRAM = *c.LowVRAM
}
mmap := false
if c.MMap != nil {
mmap = *c.MMap
}
ctxSize := 1024
if c.ContextSize != nil {
ctxSize = *c.ContextSize
}
mmlock := false
if c.MMlock != nil {
mmlock = *c.MMlock
}
nGPULayers := 9999999
if c.NGPULayers != nil {
nGPULayers = *c.NGPULayers
}
return &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
@@ -56,14 +125,14 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
CFGScale: c.Diffusers.CFGScale,
LoraAdapter: c.LoraAdapter,
LoraScale: c.LoraScale,
F16Memory: *c.F16,
F16Memory: f16,
LoraBase: c.LoraBase,
IMG2IMG: c.Diffusers.IMG2IMG,
CLIPModel: c.Diffusers.ClipModel,
CLIPSubfolder: c.Diffusers.ClipSubFolder,
CLIPSkip: int32(c.Diffusers.ClipSkip),
ControlNet: c.Diffusers.ControlNet,
ContextSize: int32(*c.ContextSize),
ContextSize: int32(ctxSize),
Seed: getSeed(c),
NBatch: int32(b),
NoMulMatQ: c.NoMulMatQ,
@@ -85,16 +154,16 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
YarnBetaSlow: c.YarnBetaSlow,
NGQA: c.NGQA,
RMSNormEps: c.RMSNormEps,
MLock: *c.MMlock,
MLock: mmlock,
RopeFreqBase: c.RopeFreqBase,
RopeScaling: c.RopeScaling,
Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: *c.Embeddings,
LowVRAM: *c.LowVRAM,
NGPULayers: int32(*c.NGPULayers),
MMap: *c.MMap,
Embeddings: embeddings,
LowVRAM: lowVRAM,
NGPULayers: int32(nGPULayers),
MMap: mmap,
MainGPU: c.MainGPU,
Threads: int32(*c.Threads),
TensorSplit: c.TensorSplit,

View File

@@ -9,21 +9,9 @@ import (
model "github.com/mudler/LocalAI/pkg/model"
)
func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
bb := backend
if bb == "" {
return nil, fmt.Errorf("backend is required")
}
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
rerankModel, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err

View File

@@ -13,7 +13,6 @@ import (
)
func SoundGeneration(
backend string,
modelFile string,
text string,
duration *float32,
@@ -25,18 +24,8 @@ func SoundGeneration(
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
if backend == "" {
return "", nil, fmt.Errorf("backend is a required parameter")
}
grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(backend),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
soundGenModel, err := loader.BackendLoader(opts...)
if err != nil {

View File

@@ -0,0 +1,33 @@
package backend
import (
"context"
"fmt"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
)
func TokenMetrics(
modelFile string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
opts := ModelOptions(backendConfig, appConfig, []model.Option{
model.WithModel(modelFile),
})
model, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err
}
if model == nil {
return nil, fmt.Errorf("could not loadmodel model")
}
res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
return res, err
}

44
core/backend/tokenize.go Normal file
View File

@@ -0,0 +1,44 @@
package backend
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc"
model "github.com/mudler/LocalAI/pkg/model"
)
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
modelFile := backendConfig.Model
var inferenceModel grpc.Backend
var err error
opts := ModelOptions(backendConfig, appConfig, []model.Option{
model.WithModel(modelFile),
})
if backendConfig.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
opts = append(opts, model.WithBackendString(backendConfig.Backend))
inferenceModel, err = loader.BackendLoader(opts...)
}
if err != nil {
return schema.TokenizeResponse{}, err
}
predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
predictOptions.Prompt = s
// tokenize the string
resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions)
if err != nil {
return schema.TokenizeResponse{}, err
}
return schema.TokenizeResponse{
Tokens: resp.Tokens,
}, nil
}

View File

@@ -14,13 +14,11 @@ import (
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(model.WhisperBackend),
model.WithModel(backendConfig.Model),
model.WithContext(appConfig.Context),
model.WithThreads(uint32(*backendConfig.Threads)),
model.WithAssetDir(appConfig.AssetsDestination),
})
if backendConfig.Backend == "" {
backendConfig.Backend = model.WhisperBackend
}
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
transcriptionModel, err := ml.BackendLoader(opts...)
if err != nil {

View File

@@ -28,14 +28,9 @@ func ModelTTS(
bb = model.PiperBackend
}
grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
ttsModel, err := loader.BackendLoader(opts...)
if err != nil {

View File

@@ -41,31 +41,35 @@ type RunCMD struct {
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
DisableApiKeyRequirementForHttpGet bool `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
}
func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -97,7 +101,11 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
config.WithOpaqueErrors(r.OpaqueErrors),
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
config.WithSubtleKeyComparison(r.UseSubtleKeyComparison),
config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet),
config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
config.WithP2PNetworkID(r.Peer2PeerNetworkID),
config.WithLoadToMemory(r.LoadToMemory),
}
token := ""

View File

@@ -85,13 +85,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend
var inputFile *string
if t.InputFile != "" {
inputFile = &t.InputFile
}
filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
filePath, _, err := backend.SoundGeneration(t.Model, text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

View File

@@ -15,8 +15,9 @@ import (
)
type UtilCMD struct {
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
UsecaseHeuristic UsecaseHeuristicCMD `cmd:"" name:"usecase-heuristic" help:"Checks a specific model config and prints what usecase LocalAI will offer for it."`
}
type GGUFInfoCMD struct {
@@ -30,6 +31,11 @@ type HFScanCMD struct {
ToScan []string `arg:""`
}
type UsecaseHeuristicCMD struct {
ConfigName string `name:"The config file to check"`
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
}
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
if u.Args == nil || len(u.Args) == 0 {
return fmt.Errorf("no GGUF file provided")
@@ -99,3 +105,31 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
return nil
}
}
func (uhcmd *UsecaseHeuristicCMD) Run(ctx *cliContext.Context) error {
if len(uhcmd.ConfigName) == 0 {
log.Error().Msg("ConfigName is a required parameter")
return fmt.Errorf("config name is a required parameter")
}
if len(uhcmd.ModelsPath) == 0 {
log.Error().Msg("ModelsPath is a required parameter")
return fmt.Errorf("model path is a required parameter")
}
bcl := config.NewBackendConfigLoader(uhcmd.ModelsPath)
err := bcl.LoadBackendConfig(uhcmd.ConfigName)
if err != nil {
log.Error().Err(err).Str("ConfigName", uhcmd.ConfigName).Msg("error while loading backend")
return err
}
bc, exists := bcl.GetBackendConfig(uhcmd.ConfigName)
if !exists {
log.Error().Str("ConfigName", uhcmd.ConfigName).Msg("ConfigName not found")
}
for name, uc := range config.GetAllBackendConfigUsecases() {
if bc.HasUsecases(uc) {
log.Info().Str("Usecase", name)
}
}
log.Info().Msg("---")
return nil
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"embed"
"encoding/json"
"regexp"
"time"
"github.com/mudler/LocalAI/pkg/xsysinfo"
@@ -16,7 +17,6 @@ type ApplicationConfig struct {
ModelPath string
LibPath string
UploadLimitMB, Threads, ContextSize int
DisableWebUI bool
F16 bool
Debug bool
ImageDir string
@@ -31,11 +31,18 @@ type ApplicationConfig struct {
PreloadModelsFromPath string
CORSAllowOrigins string
ApiKeys []string
EnforcePredownloadScans bool
OpaqueErrors bool
P2PToken string
P2PNetworkID string
DisableWebUI bool
EnforcePredownloadScans bool
OpaqueErrors bool
UseSubtleKeyComparison bool
DisableApiKeyRequirementForHttpGet bool
HttpGetExemptedEndpoints []*regexp.Regexp
DisableGalleryEndpoint bool
LoadToMemory []string
ModelLibraryURL string
Galleries []Gallery
@@ -57,8 +64,6 @@ type ApplicationConfig struct {
ModelsURL []string
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
DisableGalleryEndpoint bool
}
type AppOption func(*ApplicationConfig)
@@ -327,6 +332,38 @@ func WithOpaqueErrors(opaque bool) AppOption {
}
}
func WithLoadToMemory(models []string) AppOption {
return func(o *ApplicationConfig) {
o.LoadToMemory = models
}
}
func WithSubtleKeyComparison(subtle bool) AppOption {
return func(o *ApplicationConfig) {
o.UseSubtleKeyComparison = subtle
}
}
func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
return func(o *ApplicationConfig) {
o.DisableApiKeyRequirementForHttpGet = required
}
}
func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
return func(o *ApplicationConfig) {
o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
for _, epr := range endpoints {
r, err := regexp.Compile(epr)
if err == nil && r != nil {
o.HttpGetExemptedEndpoints = append(o.HttpGetExemptedEndpoints, r)
} else {
log.Warn().Err(err).Str("regex", epr).Msg("Error while compiling HTTP Get Exemption regex, skipping this entry.")
}
}
}
}
// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
// Some options defined at the application level are going to be passed as defaults for
// all the configuration for the models.

View File

@@ -3,11 +3,13 @@ package config
import (
"os"
"regexp"
"slices"
"strings"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
"gopkg.in/yaml.v3"
)
const (
@@ -27,13 +29,15 @@ type BackendConfig struct {
schema.PredictionOptions `yaml:"parameters"`
Name string `yaml:"name"`
F16 *bool `yaml:"f16"`
Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings *bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
F16 *bool `yaml:"f16"`
Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings *bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
KnownUsecaseStrings []string `yaml:"known_usecases"`
KnownUsecases *BackendConfigUsecases `yaml:"-"`
PromptStrings, InputStrings []string `yaml:"-"`
InputToken [][]int `yaml:"-"`
@@ -192,6 +196,21 @@ type TemplateConfig struct {
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
// It defaults to \n
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
Video string `yaml:"video"`
Image string `yaml:"image"`
Audio string `yaml:"audio"`
}
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
type BCAlias BackendConfig
var aux BCAlias
if err := value.Decode(&aux); err != nil {
return err
}
*c = BackendConfig(aux)
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
return nil
}
func (c *BackendConfig) SetFunctionCallString(s string) {
@@ -410,3 +429,121 @@ func (c *BackendConfig) Validate() bool {
func (c *BackendConfig) HasTemplate() bool {
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
}
type BackendConfigUsecases int
const (
FLAG_ANY BackendConfigUsecases = 0b000000000
FLAG_CHAT BackendConfigUsecases = 0b000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
FLAG_EDIT BackendConfigUsecases = 0b000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
FLAG_RERANK BackendConfigUsecases = 0b000010000
FLAG_IMAGE BackendConfigUsecases = 0b000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
FLAG_TTS BackendConfigUsecases = 0b010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
// Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
)
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
return map[string]BackendConfigUsecases{
"FLAG_ANY": FLAG_ANY,
"FLAG_CHAT": FLAG_CHAT,
"FLAG_COMPLETION": FLAG_COMPLETION,
"FLAG_EDIT": FLAG_EDIT,
"FLAG_EMBEDDINGS": FLAG_EMBEDDINGS,
"FLAG_RERANK": FLAG_RERANK,
"FLAG_IMAGE": FLAG_IMAGE,
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
"FLAG_TTS": FLAG_TTS,
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
"FLAG_LLM": FLAG_LLM,
}
}
func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
if len(input) == 0 {
return nil
}
result := FLAG_ANY
flags := GetAllBackendConfigUsecases()
for _, str := range input {
flag, exists := flags["FLAG_"+strings.ToUpper(str)]
if exists {
result |= flag
}
}
return &result
}
// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
return true
}
return c.GuessUsecases(u)
}
// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
if (u & FLAG_CHAT) == FLAG_CHAT {
if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
return false
}
}
if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
if c.TemplateConfig.Completion == "" {
return false
}
}
if (u & FLAG_EDIT) == FLAG_EDIT {
if c.TemplateConfig.Edit == "" {
return false
}
}
if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
if c.Embeddings == nil || !*c.Embeddings {
return false
}
}
if (u & FLAG_IMAGE) == FLAG_IMAGE {
imageBackends := []string{"diffusers", "tinydream", "stablediffusion"}
if !slices.Contains(imageBackends, c.Backend) {
return false
}
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
return false
}
}
if (u & FLAG_RERANK) == FLAG_RERANK {
if c.Backend != "rerankers" {
return false
}
}
if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
if c.Backend != "whisper" {
return false
}
}
if (u & FLAG_TTS) == FLAG_TTS {
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
if !slices.Contains(ttsBackends, c.Backend) {
return false
}
}
if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
if c.Backend != "transformers-musicgen" {
return false
}
}
return true
}

View File

@@ -0,0 +1,35 @@
package config
import "regexp"
type BackendConfigFilterFn func(string, *BackendConfig) bool
func NoFilterFn(_ string, _ *BackendConfig) bool { return true }
func BuildNameFilterFn(filter string) (BackendConfigFilterFn, error) {
if filter == "" {
return NoFilterFn, nil
}
rxp, err := regexp.Compile(filter)
if err != nil {
return nil, err
}
return func(name string, config *BackendConfig) bool {
if config != nil {
return rxp.MatchString(config.Name)
}
return rxp.MatchString(name)
}, nil
}
func BuildUsecaseFilterFn(usecases BackendConfigUsecases) BackendConfigFilterFn {
if usecases == FLAG_ANY {
return NoFilterFn
}
return func(name string, config *BackendConfig) bool {
if config == nil {
return false // TODO: Potentially make this a param, for now, no known usecase to include
}
return config.HasUsecases(usecases)
}
}

View File

@@ -201,6 +201,26 @@ func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
return res
}
func (bcl *BackendConfigLoader) GetBackendConfigsByFilter(filter BackendConfigFilterFn) []BackendConfig {
bcl.Lock()
defer bcl.Unlock()
var res []BackendConfig
if filter == nil {
filter = NoFilterFn
}
for n, v := range bcl.configs {
if filter(n, &v) {
res = append(res, v)
}
}
// TODO: I don't think this one needs to Sort on name... but we'll see what breaks.
return res
}
func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) {
bcl.Lock()
defer bcl.Unlock()

View File

@@ -19,12 +19,17 @@ var _ = Describe("Test cases for config related functions", func() {
`backend: "../foo-bar"
name: "foo"
parameters:
model: "foo-bar"`)
model: "foo-bar"
known_usecases:
- chat
- COMPLETION
`)
Expect(err).ToNot(HaveOccurred())
config, err := readBackendConfigFromFile(tmp.Name())
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
Expect(config.Validate()).To(BeFalse())
Expect(config.KnownUsecases).ToNot(BeNil())
})
It("Test Validate", func() {
tmp, err := os.CreateTemp("", "config.yaml")
@@ -61,4 +66,99 @@ parameters:
Expect(config.Validate()).To(BeTrue())
})
})
It("Properly handles backend usecase matching", func() {
a := BackendConfig{
Name: "a",
}
Expect(a.HasUsecases(FLAG_ANY)).To(BeTrue()) // FLAG_ANY just means the config _exists_ essentially.
b := BackendConfig{
Name: "b",
Backend: "stablediffusion",
}
Expect(b.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(b.HasUsecases(FLAG_IMAGE)).To(BeTrue())
Expect(b.HasUsecases(FLAG_CHAT)).To(BeFalse())
c := BackendConfig{
Name: "c",
Backend: "llama-cpp",
TemplateConfig: TemplateConfig{
Chat: "chat",
},
}
Expect(c.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(c.HasUsecases(FLAG_IMAGE)).To(BeFalse())
Expect(c.HasUsecases(FLAG_COMPLETION)).To(BeFalse())
Expect(c.HasUsecases(FLAG_CHAT)).To(BeTrue())
d := BackendConfig{
Name: "d",
Backend: "llama-cpp",
TemplateConfig: TemplateConfig{
Chat: "chat",
Completion: "completion",
},
}
Expect(d.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(d.HasUsecases(FLAG_IMAGE)).To(BeFalse())
Expect(d.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
Expect(d.HasUsecases(FLAG_CHAT)).To(BeTrue())
trueValue := true
e := BackendConfig{
Name: "e",
Backend: "llama-cpp",
TemplateConfig: TemplateConfig{
Completion: "completion",
},
Embeddings: &trueValue,
}
Expect(e.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(e.HasUsecases(FLAG_IMAGE)).To(BeFalse())
Expect(e.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
Expect(e.HasUsecases(FLAG_CHAT)).To(BeFalse())
Expect(e.HasUsecases(FLAG_EMBEDDINGS)).To(BeTrue())
f := BackendConfig{
Name: "f",
Backend: "piper",
}
Expect(f.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(f.HasUsecases(FLAG_TTS)).To(BeTrue())
Expect(f.HasUsecases(FLAG_CHAT)).To(BeFalse())
g := BackendConfig{
Name: "g",
Backend: "whisper",
}
Expect(g.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(g.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
Expect(g.HasUsecases(FLAG_TTS)).To(BeFalse())
h := BackendConfig{
Name: "h",
Backend: "transformers-musicgen",
}
Expect(h.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(h.HasUsecases(FLAG_TRANSCRIPT)).To(BeFalse())
Expect(h.HasUsecases(FLAG_TTS)).To(BeTrue())
Expect(h.HasUsecases(FLAG_SOUND_GENERATION)).To(BeTrue())
knownUsecases := FLAG_CHAT | FLAG_COMPLETION
i := BackendConfig{
Name: "i",
Backend: "whisper",
// Earlier test checks parsing, this just needs to set final values
KnownUsecases: &knownUsecases,
}
Expect(i.HasUsecases(FLAG_ANY)).To(BeTrue())
Expect(i.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
Expect(i.HasUsecases(FLAG_TTS)).To(BeFalse())
Expect(i.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
Expect(i.HasUsecases(FLAG_CHAT)).To(BeTrue())
})
})

View File

@@ -132,7 +132,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal
func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
var refFile string
uri := downloader.URI(url)
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
refFile = string(d)
if len(refFile) == 0 {
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -156,7 +156,7 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
}
uri := downloader.URI(gallery.URL)
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
return yaml.Unmarshal(d, &models)
})
if err != nil {

View File

@@ -69,7 +69,7 @@ type PromptTemplate struct {
func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
var config Config
uri := downloader.URI(url)
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
return yaml.Unmarshal(d, &config)
})
if err != nil {

View File

@@ -3,13 +3,15 @@ package http
import (
"embed"
"errors"
"fmt"
"net/http"
"strings"
"github.com/dave-gray101/v2keyauth"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/endpoints/openai"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/http/routes"
"github.com/mudler/LocalAI/core/config"
@@ -29,24 +31,6 @@ import (
"github.com/rs/zerolog/log"
)
func readAuthHeader(c *fiber.Ctx) string {
authHeader := c.Get("Authorization")
// elevenlabs
xApiKey := c.Get("xi-api-key")
if xApiKey != "" {
authHeader = "Bearer " + xApiKey
}
// anthropic
xApiKey = c.Get("x-api-key")
if xApiKey != "" {
authHeader = "Bearer " + xApiKey
}
return authHeader
}
// Embed a directory
//
//go:embed static/*
@@ -137,37 +121,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
})
}
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
auth := func(c *fiber.Ctx) error {
if len(appConfig.ApiKeys) == 0 {
return c.Next()
}
// Health Checks should always be exempt from auth, so register these first
routes.HealthRoutes(app)
if len(appConfig.ApiKeys) == 0 {
return c.Next()
}
authHeader := readAuthHeader(c)
if authHeader == "" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
}
// If it's a bearer token
authHeaderParts := strings.Split(authHeader, " ")
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
}
apiKey := authHeaderParts[1]
for _, key := range appConfig.ApiKeys {
if apiKey == key {
return c.Next()
}
}
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
if err != nil || kaConfig == nil {
return nil, fmt.Errorf("failed to create key auth config: %w", err)
}
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
app.Use(v2keyauth.New(*kaConfig))
if appConfig.CORS {
var c func(ctx *fiber.Ctx) error
if appConfig.CORSAllowOrigins == "" {
@@ -192,13 +156,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
galleryService := services.NewGalleryService(appConfig)
galleryService.Start(appConfig.Context, cl)
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
if !appConfig.DisableWebUI {
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
}
routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
routes.RegisterJINARoutes(app, cl, ml, appConfig)
httpFS := http.FS(embedDirStatic)

View File

@@ -12,6 +12,7 @@ import (
"os"
"path/filepath"
"runtime"
"strings"
"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/http"
@@ -31,6 +32,9 @@ import (
"github.com/sashabaranov/go-openai/jsonschema"
)
const apiKey = "joshua"
const bearerKey = "Bearer " + apiKey
const testPrompt = `### System:
You are an AI assistant that follows instruction extremely well. Help as much as you can.
@@ -50,11 +54,19 @@ type modelApplyRequest struct {
func getModelStatus(url string) (response map[string]interface{}) {
// Create the HTTP request
resp, err := http.Get(url)
req, err := http.NewRequest("GET", url, nil)
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
if err != nil {
fmt.Println("Error creating request:", err)
return
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
fmt.Println("Error sending request:", err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
@@ -72,14 +84,15 @@ func getModelStatus(url string) (response map[string]interface{}) {
return
}
func getModels(url string) (response []gallery.GalleryModel) {
func getModels(url string) ([]gallery.GalleryModel, error) {
response := []gallery.GalleryModel{}
uri := downloader.URI(url)
// TODO: No tests currently seem to exercise file:// urls. Fix?
uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
err := uri.DownloadWithAuthorizationAndCallback("", bearerKey, func(url string, i []byte) error {
// Unmarshal YAML data into a struct
return json.Unmarshal(i, &response)
})
return
return response, err
}
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
@@ -101,6 +114,7 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
return
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
// Make the request
client := &http.Client{}
@@ -140,6 +154,7 @@ func postRequestJSON[B any](url string, bodyJson *B) error {
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
client := &http.Client{}
resp, err := client.Do(req)
@@ -175,6 +190,7 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
client := &http.Client{}
resp, err := client.Do(req)
@@ -195,6 +211,35 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
return json.Unmarshal(body, respJson)
}
func postInvalidRequest(url string) (error, int) {
req, err := http.NewRequest("POST", url, bytes.NewBufferString("invalid request"))
if err != nil {
return err, -1
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return err, -1
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return err, -1
}
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)), resp.StatusCode
}
return nil, resp.StatusCode
}
//go:embed backend-assets/*
var backendAssets embed.FS
@@ -260,6 +305,7 @@ var _ = Describe("API test", func() {
config.WithContext(c),
config.WithGalleries(galleries),
config.WithModelPath(modelDir),
config.WithApiKeys([]string{apiKey}),
config.WithBackendAssets(backendAssets),
config.WithBackendAssetsOutput(backendAssetsDir))...)
Expect(err).ToNot(HaveOccurred())
@@ -269,7 +315,7 @@ var _ = Describe("API test", func() {
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig := openai.DefaultConfig(apiKey)
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
@@ -295,10 +341,19 @@ var _ = Describe("API test", func() {
Expect(err).To(HaveOccurred())
})
Context("Auth Tests", func() {
It("Should fail if the api key is missing", func() {
err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
Expect(err).ToNot(BeNil())
Expect(sc).To(Equal(403))
})
})
Context("Applying models", func() {
It("applies models from a gallery", func() {
models := getModels("http://127.0.0.1:9090/models/available")
models, err := getModels("http://127.0.0.1:9090/models/available")
Expect(err).To(BeNil())
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
@@ -331,7 +386,8 @@ var _ = Describe("API test", func() {
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["foo"]).To(Equal("bar"))
models = getModels("http://127.0.0.1:9090/models/available")
models, err = getModels("http://127.0.0.1:9090/models/available")
Expect(err).To(BeNil())
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
@@ -895,7 +951,7 @@ var _ = Describe("API test", func() {
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
@@ -914,7 +970,7 @@ var _ = Describe("API test", func() {
tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
Expect(strings.ToLower(text)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})

View File

@@ -19,14 +19,16 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo
if ctx.Params("model") != "" {
modelInput = ctx.Params("model")
}
if ctx.Query("model") != "" {
modelInput = ctx.Query("model")
}
// Set model from bearer token, if available
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelInput == "" && !bearerExists && firstModel {
models, _ := services.ListModels(cl, loader, "", true)
models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
if len(models) > 0 {
modelInput = models[0]
log.Debug().Msgf("No model specified, using: %s", modelInput)

View File

@@ -6,6 +6,7 @@ import (
"github.com/chasefleming/elem-go"
"github.com/chasefleming/elem-go/attrs"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
@@ -41,7 +42,7 @@ func DoneProgress(galleryID, text string, showDelete bool) string {
"tabindex": "-1",
"autofocus": "",
},
elem.Text(text),
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
),
elem.If(showDelete, deleteButton(galleryID, modelName), reInstallButton(galleryID)),
).Render()
@@ -57,7 +58,7 @@ func ErrorProgress(err, galleryName string) string {
"tabindex": "-1",
"autofocus": "",
},
elem.Text("Error "+err),
elem.Text("Error "+bluemonday.StrictPolicy().Sanitize(err)),
),
installButton(galleryName),
).Render()
@@ -170,7 +171,7 @@ func P2PNodeBoxes(nodes []p2p.NodeData) string {
attrs.Props{
"class": "text-gray-200 font-semibold ml-2 mr-1",
},
elem.Text(n.ID),
elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)),
),
elem.Text("Status: "),
elem.If(
@@ -227,7 +228,7 @@ func StartProgressBar(uid, progress, text string) string {
"tabindex": "-1",
"autofocus": "",
},
elem.Text(text),
elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
elem.Div(attrs.Props{
"hx-get": "/browse/job/progress/" + uid,
"hx-trigger": "every 600ms",
@@ -249,9 +250,7 @@ func cardSpan(text, icon string) elem.Node {
"class": icon + " pr-2",
}),
elem.Text(text),
//elem.Text(text),
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
)
}
@@ -285,11 +284,9 @@ func searchableElement(text, icon string) elem.Node {
elem.I(attrs.Props{
"class": icon + " pr-2",
}),
elem.Text(text),
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
),
),
//elem.Text(text),
)
}
@@ -303,7 +300,7 @@ func link(text, url string) elem.Node {
elem.I(attrs.Props{
"class": "fas fa-link pr-2",
}),
elem.Text(text),
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
)
}
func installButton(galleryName string) elem.Node {
@@ -387,13 +384,13 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
attrs.Props{
"class": "mb-2 text-xl font-bold leading-tight",
},
elem.Text(m.Name),
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
),
elem.P(
attrs.Props{
"class": "mb-4 text-sm [&:not(:hover)]:truncate text-base",
},
elem.Text(m.Description),
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
),
)
}

View File

@@ -55,7 +55,7 @@ func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
}
// TODO: Support uploading files?
filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
if err != nil {
return err
}

View File

@@ -45,13 +45,13 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Request for model: %s", modelFile)
if input.Backend != "" {
@@ -64,7 +64,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
Documents: req.Documents,
}
results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
if err != nil {
return err
}

View File

@@ -0,0 +1,60 @@
package localai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/schema"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/pkg/model"
)
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
//
// @Summary Get TokenMetrics for Active Slot.
// @Accept json
// @Produce audio/x-wav
// @Success 200 {string} binary "generated audio/wav file"
// @Router /v1/tokenMetrics [get]
// @Router /tokenMetrics [get]
func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.TokenMetricsRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
log.Err(err)
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Token Metrics for model: %s", modelFile)
response, err := backend.TokenMetrics(modelFile, ml, appConfig, *cfg)
if err != nil {
return err
}
return c.JSON(response)
}
}

View File

@@ -17,12 +17,14 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
if err != nil {
return err
}
loadedModels := ml.ListModels()
for b := range appConfig.ExternalGRPCBackends {
availableBackends = append(availableBackends, b)
}
return c.JSON(
schema.SystemInformationResponse{
Backends: availableBackends,
Models: loadedModels,
},
)
}

View File

@@ -0,0 +1,58 @@
package localai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
// TokenizeEndpoint exposes a REST API to tokenize the content
// @Summary Tokenize the input.
// @Success 200 {object} schema.TokenizeResponse "Response"
// @Router /v1/tokenize [post]
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.TokenizeRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
log.Err(err)
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Request for model: %s", modelFile)
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
if err != nil {
return err
}
c.JSON(tokenResponse)
return nil
}
}

View File

@@ -13,15 +13,10 @@ import (
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
models, _ := services.ListModels(cl, ml, "", true)
backendConfigs := cl.GetAllBackendConfigs()
galleryConfigs := map[string]*gallery.Config{}
modelsWithBackendConfig := map[string]interface{}{}
for _, m := range backendConfigs {
modelsWithBackendConfig[m.Name] = nil
cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
if err != nil {
continue
@@ -29,17 +24,11 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
galleryConfigs[m.Name] = cfg
}
modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
// Get model statuses to display in the UI the operation in progress
processingModels, taskTypes := modelStatus()
modelsWithoutConfig := []string{}
for _, m := range models {
if _, ok := modelsWithBackendConfig[m]; !ok {
modelsWithoutConfig = append(modelsWithoutConfig, m)
}
}
summary := fiber.Map{
"Title": "LocalAI API - " + internal.PrintableVersion(),
"Version": internal.PrintableVersion(),

View File

@@ -10,6 +10,7 @@ import (
"time"
"github.com/gofiber/fiber/v2"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
@@ -83,7 +84,7 @@ func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
if !modelExists(cl, ml, request.Model) {
log.Warn().Msgf("Model: %s was not found in list of models.", request.Model)
return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found")
return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Model %q not found", request.Model)))
}
if request.Tools == nil {
@@ -147,7 +148,7 @@ func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoade
// Convert string limit to integer
limit, err := strconv.Atoi(limitQuery)
if err != nil {
return c.Status(http.StatusBadRequest).SendString(fmt.Sprintf("Invalid limit query value: %s", limitQuery))
return c.Status(http.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Invalid limit query value: %s", limitQuery)))
}
// Sort assistants
@@ -225,7 +226,7 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
found = false
models, err := services.ListModels(cl, ml, "", true)
models, err := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
if err != nil {
return
}
@@ -288,7 +289,7 @@ func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader,
}
}
return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)))
}
}
@@ -337,11 +338,11 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
}
}
return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find file_id: %s", request.FileID))
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find file_id: %s", request.FileID)))
}
}
return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find %q", assistantID))
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find %q", assistantID)))
}
}
@@ -442,7 +443,7 @@ func ModifyAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
return c.Status(fiber.StatusOK).JSON(newAssistant)
}
}
return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant with id: %s", assistantID))
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)))
}
}
@@ -513,9 +514,9 @@ func GetAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoa
if assistantFile.ID == fileId {
return c.Status(fiber.StatusOK).JSON(assistantFile)
}
return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId))
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId)))
}
}
return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID))
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID)))
}
}

View File

@@ -161,6 +161,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
textContentToReturn = ""
id = uuid.New().String()
created = int(time.Now().Unix())
// Set CorrelationID
correlationID := c.Get("X-Correlation-ID")
if len(strings.TrimSpace(correlationID)) == 0 {
correlationID = id
}
c.Set("X-Correlation-ID", correlationID)
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
if err != nil {
@@ -444,6 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
c.Set("X-Correlation-ID", id)
responses := make(chan schema.OpenAIResponse)
@@ -640,8 +647,16 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
for _, m := range input.Messages {
images = append(images, m.StringImages...)
}
videos := []string{}
for _, m := range input.Messages {
videos = append(videos, m.StringVideos...)
}
audios := []string{}
for _, m := range input.Messages {
audios = append(audios, m.StringAudios...)
}
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
if err != nil {
log.Error().Err(err).Msg("model inference failed")
return "", err

View File

@@ -57,6 +57,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
}
return func(c *fiber.Ctx) error {
// Add Correlation
c.Set("X-Correlation-ID", id)
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)

Some files were not shown because too many files have changed in this diff Show More