Compare commits

..

1051 Commits

Author SHA1 Message Date
LocalAI [bot]
abd678e147 ⬆️ Update ggerganov/llama.cpp (#1655)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-28 09:24:44 +01:00
Ettore Di Giacinto
6ac5d814fb feat(startup): fetch model definition remotely (#1654) 2024-01-28 00:14:16 +01:00
LocalAI [bot]
f928899338 ⬆️ Update ggerganov/llama.cpp (#1652)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-27 00:13:38 +01:00
Ettore Di Giacinto
5a6fd98839 fix(paths): automatically create paths (#1650)
Especially useful when running inside a container.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-01-27 00:13:19 +01:00
Ettore Di Giacinto
072f71dfb7 Update codellama-7b.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-26 18:35:33 +01:00
Ettore Di Giacinto
670cee8274 Update transformers-tinyllama.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-26 18:29:38 +01:00
Ettore Di Giacinto
9f1be45552 Update quickstart.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-26 17:55:20 +01:00
Ettore Di Giacinto
f1846ae5ac Update phi-2.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-26 16:22:54 +01:00
LocalAI [bot]
ac19998e5e ⬆️ Update ggerganov/llama.cpp (#1644)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-26 00:13:39 +01:00
Ettore Di Giacinto
cb7512734d transformers: correctly load automodels (#1643)
* backends(transformers): use AutoModel with LLM types

* examples: animagine-xl

* Add codellama examples
2024-01-26 00:13:21 +01:00
LocalAI [bot]
3733250b3c ⬆️ Update ggerganov/llama.cpp (#1642)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-24 22:51:59 +01:00
LocalAI [bot]
da3cd8993d ⬆️ Update docs version mudler/LocalAI (#1631)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-24 19:50:33 +01:00
LocalAI [bot]
7690caf020 ⬆️ Update ggerganov/llama.cpp (#1632)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-23 23:07:51 +01:00
Ettore Di Giacinto
5e335eaead feat(transformers): support also text generation (#1630)
* feat(transformers): support also text generation

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* embedded: set seed -1

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-01-23 23:07:31 +01:00
coyzeng
d5d82ba344 feat(grpc): backend SPI pluggable in embedding mode (#1621)
* run server

* grpc backend embedded support

* backend providable
2024-01-23 08:56:36 +01:00
LocalAI [bot]
efe2883c5d ⬆️ Update ggerganov/llama.cpp (#1626)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-22 23:22:01 +01:00
LocalAI [bot]
47237c7c3c ⬆️ Update ggerganov/llama.cpp (#1623)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-22 08:54:06 +01:00
Ettore Di Giacinto
697c769b64 fix(llama.cpp): enable cont batching when parallel is set (#1622)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-01-21 14:59:48 +01:00
Ettore Di Giacinto
94261b1717 Update gpt-vision.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-21 10:07:30 +01:00
Sebastian
eaf85a30f9 fix(llama.cpp): Enable parallel requests (#1616)
integrate changes from llama.cpp

Signed-off-by: Sebastian <tauven@gmail.com>
2024-01-21 09:56:14 +01:00
LocalAI [bot]
6a88b030ea ⬆️ Update ggerganov/llama.cpp (#1620)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-20 23:34:46 +01:00
LocalAI [bot]
f538416fb3 ⬆️ Update docs version mudler/LocalAI (#1619)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-20 21:37:02 +00:00
Ettore Di Giacinto
06cd9ef98d feat(extra-backends): Improvements, adding mamba example (#1618)
* feat(extra-backends): Improvements

vllm: add max_tokens, wire up stream event
mamba: fixups, adding examples for mamba-chat

* examples(mamba-chat): add

* docs: update
2024-01-20 17:56:08 +01:00
James Braza
f3d71f8819 Modernized LlamaIndex integration (#1613)
Updated LlamaIndex example
2024-01-20 10:06:32 +01:00
James Braza
b7127c2dc9 Expanded and interlinked Docker documentation (#1614)
* Corrected dockerhub to Docker Hub

* Consolidated two Docker examples

* Linked Container Images in Manual Images
2024-01-20 10:05:14 +01:00
LocalAI [bot]
b2dc5fbd7e ⬆️ Update ggerganov/llama.cpp (#1612)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-20 00:38:14 +01:00
Ettore Di Giacinto
9e653d6abe feat: 🐍 add mamba support (#1589)
feat(mamba): Initial import

This is a first iteration of the mamba backend, loosely based on
mamba-chat(https://github.com/havenhq/mamba-chat).
2024-01-19 23:42:50 +01:00
Ettore Di Giacinto
52c9a7f45d Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-19 19:30:29 +01:00
Ettore Di Giacinto
ee42c9bfe6 docs: re-use original permalinks (#1610)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-01-19 19:23:58 +01:00
Ettore Di Giacinto
e6c3e483a1 Update build.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-19 19:09:35 +01:00
Ettore Di Giacinto
3a253c6cd7 Makefile: allow to build without GRPC_BACKENDS (#1607) 2024-01-19 15:38:43 +01:00
Luna Midori
e9c3bbc6d7 Update README.md (#1601)
Signed-off-by: Luna Midori <118759930+lunamidori5@users.noreply.github.com>
2024-01-19 08:55:37 +01:00
LocalAI [bot]
23d64ac53a ⬆️ Update ggerganov/llama.cpp (#1604)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-18 21:20:50 +00:00
Ettore Di Giacinto
34f9f20ff4 Update quickstart.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-18 20:49:04 +01:00
Ettore Di Giacinto
a4a72a79ae Update integrations.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-18 19:53:41 +01:00
Ettore Di Giacinto
6ca4d38a01 docs/examples: enhancements (#1572)
* docs: re-order sections

* fix references

* Add mixtral-instruct, tinyllama-chat, dolphin-2.5-mixtral-8x7b

* Fix link

* Minor corrections

* fix: models is a StringSlice, not a String

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP: switch docs theme

* content

* Fix GH link

* enhancements

* enhancements

* Fixed how to link

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* fixups

* logo fix

* more fixups

* final touches

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
Co-authored-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2024-01-18 19:41:08 +01:00
LocalAI [bot]
b5c93f176a ⬆️ Update ggerganov/llama.cpp (#1599)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-18 14:39:30 +01:00
LocalAI [bot]
1aaf88098d ⬆️ Update ggerganov/llama.cpp (#1597)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-17 09:27:02 +01:00
Dionysius
6f447e613d docs: missing golang requirement for local build for debian (#1596)
docs: fix missing golang requirement for local build for debian
2024-01-17 09:26:43 +01:00
LocalAI [bot]
dfb7c3b1aa ⬆️ Update ggerganov/llama.cpp (#1594)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-16 14:47:57 +01:00
Dionysius
b41eb5e1f3 prepend built binaries in PATH for BUILD_GRPC_FOR_BACKEND_LLAMA (#1593)
prepend built binaries in PATH
2024-01-16 14:47:47 +01:00
LocalAI [bot]
9c2d264979 ⬆️ Update ggerganov/llama.cpp (#1590)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-15 09:01:07 +01:00
LocalAI [bot]
b996c3198c ⬆️ Update ggerganov/llama.cpp (#1587)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-14 09:46:47 +00:00
Ettore Di Giacinto
f879c07c86 Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-14 10:00:46 +01:00
Dionysius
441e2965ff move BUILD_GRPC_FOR_BACKEND_LLAMA logic to makefile: errors in this section now immediately fail the build (#1576)
* move BUILD_GRPC_FOR_BACKEND_LLAMA option to makefile

* review: oversight, fixup cmake_args

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Signed-off-by: Dionysius <1341084+dionysius@users.noreply.github.com>

---------

Signed-off-by: Dionysius <1341084+dionysius@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-13 10:08:26 +01:00
LocalAI [bot]
cbe9a03e3c ⬆️ Update ggerganov/llama.cpp (#1583)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-12 23:04:04 +01:00
LocalAI [bot]
4ee7e73d00 ⬆️ Update ggerganov/llama.cpp (#1578)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-12 16:04:33 +01:00
lunamidori5
1cca449726 Moving the how tos to self hosted (#1574)
* Update _index.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-setup-sd.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-setup-full.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-setup-embeddings.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-setup-docker.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-model.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update README.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos directory

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

---------

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2024-01-11 09:25:18 +01:00
LocalAI [bot]
faf7c1c325 ⬆️ Update ggerganov/llama.cpp (#1573)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-11 08:41:32 +01:00
LocalAI [bot]
58288494d6 ⬆️ Update ggerganov/llama.cpp (#1568)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-10 10:18:57 +01:00
Dionysius
72283dc744 minor: replace shell pwd in Makefile with CURDIR for better windows compatibility (#1571)
replace shell pwd in Makefile with CURDIR
2024-01-10 08:39:50 +00:00
LocalAI [bot]
b8240b4c18 ⬆️ Update docs version mudler/LocalAI (#1567)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-09 21:56:12 +01:00
Ettore Di Giacinto
5309da40b7 Update Dockerfile
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-09 08:55:43 +01:00
Ettore Di Giacinto
08b90b4720 Update _index.en.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-09 08:50:19 +01:00
LocalAI [bot]
2e890b3838 ⬆️ Update ggerganov/llama.cpp (#1563)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-09 08:48:40 +01:00
LocalAI [bot]
06656fc057 ⬆️ Update docs version mudler/LocalAI (#1562)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-09 08:48:24 +01:00
LocalAI [bot]
574fa67bdc ⬆️ Update ggerganov/llama.cpp (#1558)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-08 00:38:03 +01:00
Ettore Di Giacinto
e19d7226f8 feat: more embedded models, coqui fixes, add model usage and description (#1556)
* feat: add model descriptions and usage

* remove default model gallery

* models: add embeddings and tts

* docs: update table

* docs: updates

* images: cleanup pip cache after install

* images: always run apt-get clean

* ux: improve gRPC connection errors

* ux: improve some messages

* fix: fix coqui when no AudioPath is passed by

* embedded: add more models

* Add usage

* Reorder table
2024-01-08 00:37:02 +01:00
LocalAI [bot]
0843fe6c65 ⬆️ Update docs version mudler/LocalAI (#1557)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-07 09:36:21 +01:00
Ettore Di Giacinto
62a02cd1fe deps(conda): use transformers environment with autogptq (#1555) 2024-01-06 15:30:53 +01:00
Ettore Di Giacinto
949da7792d deps(conda): use transformers-env with vllm,exllama(2) (#1554)
* deps(conda): use transformers with vllm

* join vllm, exllama, exllama2, split petals
2024-01-06 13:32:28 +01:00
Ettore Di Giacinto
ce724a7e55 docs: improve getting started (#1553)
* docs: improve getting started

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* cleanups

* Use dockerhub links

* Shrink command to minimum

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-06 01:04:14 +01:00
LocalAI [bot]
0a06c80801 ⬆️ Update ggerganov/llama.cpp (#1547)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-05 23:27:51 +01:00
LocalAI [bot]
edc55ade61 ⬆️ Update docs version mudler/LocalAI (#1546)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
Co-authored-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2024-01-05 23:27:30 +01:00
Ettore Di Giacinto
09e5d9007b feat: embedded model configurations, add popular model examples, refactoring (#1532)
* move downloader out

* separate startup functions for preloading configuration files

* docs: add popular model examples

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* shorteners

* Add llava

* Add mistral-openorca

* Better link to build section

* docs: update

* fixup

* Drop code dups

* Minor fixups

* Apply suggestions from code review

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* ci: try to cache gRPC build during tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: do not build all images for tests, just necessary

* ci: cache gRPC also in release pipeline

* fixes

* Update model_preload_test.go

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-05 23:16:33 +01:00
Ettore Di Giacinto
db926896bd Revert "[Refactor]: Core/API Split" (#1550)
Revert "[Refactor]: Core/API Split (#1506)"

This reverts commit ab7b4d5ee9.
2024-01-05 18:04:46 +01:00
Dave
ab7b4d5ee9 [Refactor]: Core/API Split (#1506)
Refactors api folder to core, creates firm split between backend code and api frontend.
2024-01-05 15:34:56 +01:00
Ettore Di Giacinto
bcf02449b3 ci(dockerhub): push images also to dockerhub (#1542)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-04 08:32:29 +01:00
LocalAI [bot]
d48faf35ab ⬆️ Update ggerganov/llama.cpp (#1544)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-04 00:08:03 +01:00
Ettore Di Giacinto
583bd28a5c fix(diffusers): add omegaconf dependency (#1540)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-04 00:06:41 +01:00
LocalAI [bot]
7e1d8c489b ⬆️ Update ggerganov/llama.cpp (#1533)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-03 08:43:35 +01:00
LocalAI [bot]
de28867374 ⬆️ Update ggerganov/llama.cpp (#1531)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2024-01-02 00:28:22 +00:00
Ettore Di Giacinto
a1aa6cb7c2 fix(entrypoint): cd to backend dir before start (#1530)
Certain backends as vall-e-x are not meant to be used as a library, so
we want to start the process in the same folder where the backend and
all the assets are fixes #1394
2024-01-01 22:02:48 +01:00
Ettore Di Giacinto
85e2767dca feat: add trimsuffix (#1528) 2024-01-01 14:39:42 +01:00
Ettore Di Giacinto
fd48cb6506 deps(llama.cpp): update and sync grpc server (#1527)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-01-01 14:39:31 +01:00
Ettore Di Giacinto
522659eb59 feat(prepare): allow to specify additional files to download (#1526) 2024-01-01 14:39:13 +01:00
Ettore Di Giacinto
f068efe509 docs(phi-2): add example (#1525) 2024-01-01 10:51:47 +01:00
Ettore Di Giacinto
726fe416bb docs: update hot topics
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-01-01 10:41:39 +01:00
Ettore Di Giacinto
66fa4f1767 feat: share models by url (#1522)
* feat: allow to pass by models via args

* expose it also as an env/arg

* docs: enhancements to build/requirements

* do not display status always

* print download status

* not all mesages are debug
2024-01-01 10:31:03 +01:00
Ettore Di Giacinto
d6565f3b99 Update _index.en.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-31 10:58:22 +01:00
LocalAI [bot]
27686ff20b ⬆️ Update ggerganov/llama.cpp (#1518)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-31 00:19:08 +00:00
LocalAI [bot]
a8b865022f ⬆️ Update docs version mudler/LocalAI (#1517)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-30 23:50:24 +00:00
Ettore Di Giacinto
c1888a8062 feat(preload): prepare models in galleries (#1515)
Previously if applying models from the gallery API, we didn't actually
allowed remote URLs as models as nothing was actually downloading the
models referenced in the configuration file. Now we call Preload after
we have all the models loaded in memory.
2023-12-30 18:55:18 +01:00
Ettore Di Giacinto
a95bb0521d fix(download): correctly check for not found error (#1514) 2023-12-30 15:36:46 +01:00
Chris Natale
e2311a145c Fix: Set proper Homebrew install location for x86 Macs (#1510)
* set proper Homebrew install location for x86 Macs

* fix: remove prior conditional that my logic replaces
2023-12-30 12:37:26 +01:00
lunamidori5
d4e0bab6be Update version.json (2.3.0) (#1511)
Update version.json

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-30 10:19:46 +01:00
LocalAI [bot]
5b0dc20e4c ⬆️ Update ggerganov/llama.cpp (#1509)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-30 09:19:07 +00:00
Ettore Di Giacinto
9723c3c21d Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-28 23:06:40 +01:00
Ettore Di Giacinto
9dc32275ad Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-28 23:03:44 +01:00
Ettore Di Giacinto
611c11f57b Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-28 23:03:10 +01:00
Ettore Di Giacinto
763d1f524a Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-28 23:01:52 +01:00
LocalAI [bot]
6428003c3b ⬆️ Update ggerganov/llama.cpp (#1503)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-28 22:44:50 +01:00
LocalAI [bot]
2eac4f93bb ⬆️ Update ggerganov/llama.cpp (#1501)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-28 00:51:29 +00:00
JZacharie
24adf9cbcb remove default to stablediffusion (#1500) 2023-12-27 23:16:49 +00:00
LocalAI [bot]
c45f581c47 ⬆️ Update ggerganov/llama.cpp (#1496)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-26 19:15:58 -05:00
Ettore Di Giacinto
ae0c48e6bd ci(apple): speedups (#1471)
* ci(apple): install grpc from brew

* ci(apple): use brew deps also on release

* ci(linux): install grpc from package manager

* ci: set concurrency

* Revert "ci(linux): install grpc from package manager"

This reverts commit 004e3e308e.
2023-12-26 19:19:37 +01:00
LocalAI [bot]
4ca649154d ⬆️ Update ggerganov/llama.cpp (#1495)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-26 17:53:59 +00:00
Ettore Di Giacinto
66dd387858 Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-25 09:04:35 +01:00
LocalAI [bot]
9789f5a96a ⬆️ Update ggerganov/llama.cpp (#1492)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-25 02:43:35 -05:00
Gianluca Boiano
cae7b197ec feat: add tiny dream stable diffusion support (#1283)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2023-12-24 19:27:24 +00:00
l
f7621b2c6c feat: partial download (#1486)
* add .partial download

* fix Stat check

* review partial download
2023-12-24 19:39:33 +01:00
Ettore Di Giacinto
95eb72bfd3 feat: add 🐸 coqui (#1489)
* feat: add coqui

* docs: update news
2023-12-24 19:38:54 +01:00
BobMaster
7e2d101a46 fix: guidance_scale not work in sd (#1488)
Signed-off-by: hibobmaster <32976627+hibobmaster@users.noreply.github.com>
2023-12-24 19:24:52 +01:00
Sertaç Özercan
6597881854 fix: exllama2 backend (#1484)
Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
2023-12-24 08:32:12 +00:00
LocalAI [bot]
eaa899df63 ⬆️ Update ggerganov/whisper.cpp (#1483)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-24 02:53:29 -05:00
LocalAI [bot]
16ed0bd0c5 ⬆️ Update ggerganov/llama.cpp (#1482)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-24 02:53:12 -05:00
Ettore Di Giacinto
939187a129 env(conda): use transformers for vall-e-x (#1481) 2023-12-23 14:31:34 -05:00
Ettore Di Giacinto
4b520c3343 docs: add langchain4j integration (#1476)
* docs: add langchain4j integration

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* Update docs/content/integrations/langchain4j.md

Co-authored-by: LangChain4j <langchain4j@gmail.com>
Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update langchain4j.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
Co-authored-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
Co-authored-by: LangChain4j <langchain4j@gmail.com>
2023-12-23 09:13:56 +00:00
LocalAI [bot]
51215d480a ⬆️ Update ggerganov/whisper.cpp (#1480)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-23 09:11:40 +00:00
LocalAI [bot]
987f0041d3 ⬆️ Update ggerganov/llama.cpp (#1469)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-23 00:05:56 +00:00
LocalAI [bot]
a29de9bf50 ⬆️ Update donomii/go-rwkv.cpp (#1478)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-22 15:02:32 +01:00
LocalAI [bot]
9bd5831fda ⬆️ Update ggerganov/whisper.cpp (#1479)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-22 08:26:39 +01:00
LocalAI [bot]
59f0f2f0fd ⬆️ Update docs version mudler/LocalAI (#1477)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-22 00:28:42 +00:00
Ettore Di Giacinto
9ae47d37e9 pin go-rwkv
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-21 08:42:40 +01:00
Ettore Di Giacinto
2b3ad7f41c Revert "⬆️ Update donomii/go-rwkv.cpp" (#1474)
Revert "⬆️ Update donomii/go-rwkv.cpp (#1470)"

This reverts commit 51db10b18f.
2023-12-21 08:38:50 +01:00
LocalAI [bot]
51db10b18f ⬆️ Update donomii/go-rwkv.cpp (#1470)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-21 08:35:31 +01:00
Ettore Di Giacinto
b4b21a446b feat(conda): share envs with transformer-based backends (#1465)
* feat(conda): share env between diffusers and bark

* Detect if env already exists

* share diffusers and petals

* tests: add petals

* Use smaller model for tests with petals

* test only model load on petals

* tests(petals): run only load model tests

* Revert "test only model load on petals"

This reverts commit 111cfa97f1.

* move transformers and sentencetransformers to common env

* Share also transformers-musicgen
2023-12-21 08:35:15 +01:00
LocalAI [bot]
23eced1644 ⬆️ Update ggerganov/llama.cpp (#1461)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-20 18:02:52 +01:00
LocalAI [bot]
7741a6e75d ⬆️ Update ggerganov/whisper.cpp (#1462)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-20 00:21:49 +00:00
LocalAI [bot]
d4210db0c9 ⬆️ Update ggerganov/llama.cpp (#1457)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-19 00:42:19 +01:00
lunamidori5
17dde75107 How To (Updates and Fixes) (#1456)
* Update easy-setup-embeddings.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update and rename easy-setup-docker-cpu.md to easy-setup-docker.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-sd.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-sd.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

---------

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-18 18:59:08 +01:00
Ettore Di Giacinto
1fc3a375df feat: inline templates and accept URLs in models (#1452)
* feat: Allow inline templates

* feat: Allow to specify url in model config files

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* feat: support 'huggingface://' format

* style: reuse-code from gallery

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-18 18:58:44 +01:00
LocalAI [bot]
64a8471dd5 ⬆️ Update ggerganov/llama.cpp (#1455)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-18 08:55:29 +01:00
LocalAI [bot]
86a8df1c8b ⬆️ Update ggerganov/llama.cpp (#1450)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-17 19:02:28 +01:00
Ettore Di Giacinto
2eeed2287b docs: automatically track latest versions (#1451) 2023-12-17 19:02:13 +01:00
Ettore Di Giacinto
3d83128f16 feat(alias): alias llama to llama-cpp, update docs (#1448)
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-16 18:22:45 +01:00
Ettore Di Giacinto
1c286c3c2f docs(mixtral): add mixtral example (#1449) 2023-12-16 17:44:43 +01:00
LocalAI [bot]
2f7beb6744 ⬆️ Update ggerganov/whisper.cpp (#1434)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-16 09:22:28 +01:00
LocalAI [bot]
ab0370a0b9 ⬆️ Update ggerganov/llama.cpp (#1429)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-16 09:22:13 +01:00
LocalAI [bot]
3f9a41684a ⬆️ Update mudler/go-piper (#1441)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-16 09:21:56 +01:00
Ettore Di Giacinto
dd982acf2c feat(img2vid,txt2vid): Initial support for img2vid,txt2vid (#1442)
* feat(img2vid): Initial support for img2vid

* doc(SD): fix SDXL Example

* Minor fixups for img2vid

* docs(img2img): fix example curl call

* feat(txt2vid): initial support

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* diffusers: be retro-compatible with CUDA settings

* docs(img2vid, txt2vid): examples

* Add notice on docs

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-15 18:06:20 -05:00
Ettore Di Giacinto
fb6a5bc620 update(llama.cpp): update server, correctly propagate LLAMA_VERSION (#1440)
* fix(Makefile): correctly propagate LLAMA_VERSION

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* update grpc-server.cpp

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-15 08:26:48 +01:00
Ettore Di Giacinto
7641f92cde feat(diffusers): update, add autopipeline, controlnet (#1432)
* feat(diffusers): update, add autopipeline, controlenet

* tests with AutoPipeline

* simplify logic
2023-12-13 19:20:22 +01:00
LocalAI [bot]
72325fd0a3 ⬆️ Update ggerganov/whisper.cpp (#1430)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-13 08:37:02 +01:00
Sertaç Özercan
1b7ed5e2e6 docs: add aikit to integrations (#1412)
* docs: add aikit to integrations

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

* docs: add to readme

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

---------

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
Co-authored-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-12 18:58:57 +01:00
LocalAI [bot]
86fac272d8 ⬆️ Update ggerganov/llama.cpp (#1391)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-12 18:22:48 +01:00
Samuel Walker
865e523ff1 Documentation for Hipblas (#1425)
hiplas arch
2023-12-12 15:05:01 +01:00
Ettore Di Giacinto
9aa2a7ca13 extras: add vllm,bark,vall-e-x tests, bump diffusers (#1422)
* tests: add vllm

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* tests: Add vall-e-x tests

* Add bark tests

* bump diffusers

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-12 00:39:26 +01:00
Ettore Di Giacinto
e80cbca6b0 Update _index.en.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-12 00:37:01 +01:00
Ettore Di Giacinto
718a5d4a9e fix(transformers*): add sentence-transformers and transformers-musicgen tests, fix musicgen wrapper (#1420)
* tests: add sentence-transformers and transformers-musicgen

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* fix: tranformers-musicgen conda env

Initialize correctly the environment for the transformers-musicgen backend.

* fix(tests): transformer-musicgen tests fixups

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-11 19:26:02 +01:00
lunamidori5
9222bec8b1 How To Updates / Model Used Switched / Removed "docker-compose" (RIP) (#1417)
* Update _index.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-model.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-model.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

---------

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-11 14:27:29 +00:00
LocalAI [bot]
4a965e1b0e ⬆️ Update ggerganov/whisper.cpp (#1418)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-11 08:24:48 +01:00
Ettore Di Giacinto
48e5380e45 tests: add diffusers tests (#1419) 2023-12-11 08:20:34 +01:00
LocalAI [bot]
831418612b ⬆️ Update mudler/go-piper (#1400)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-10 08:50:26 +01:00
LocalAI [bot]
89ff12309d ⬆️ Update ggerganov/whisper.cpp (#1390)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-09 09:23:40 +01:00
Ettore Di Giacinto
3a4fb6fa4b feat(entrypoint): optionally prepare extra endpoints (#1405)
entrypoint: optionally prepare extra endpoints

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-12-08 20:04:13 +01:00
Ettore Di Giacinto
b181503c30 docs: update v2.0.0 notes
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-08 16:06:24 +01:00
Ettore Di Giacinto
887b3dff04 feat: cuda transformers (#1401)
* Use cuda in transformers if available

tensorflow probably needs a different check.

Signed-off-by: Erich Schubert <kno10@users.noreply.github.com>

* feat: expose CUDA at top level

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* tests: add to tests and create workflow for py extra backends

* doc: update note on how to use core images

---------

Signed-off-by: Erich Schubert <kno10@users.noreply.github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Erich Schubert <kno10@users.noreply.github.com>
2023-12-08 15:45:04 +01:00
Ettore Di Giacinto
3822bd2369 docs: updates
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-08 15:11:44 +01:00
Ettore Di Giacinto
4de2c6a421 docs: update news
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-08 14:59:25 +01:00
Ettore Di Giacinto
6c4231fd35 docs: 2.0 updates
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-08 14:58:53 +01:00
lunamidori5
adfa7aa1fa docs: site update fixing old image text / How To update updating GPU and CPU docker pages (#1399)
* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

---------

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-08 10:27:21 +01:00
Dave
8b6e601405 Feat: new backend: transformers-musicgen (#1387)
Transformers-MusicGen
---------

Signed-off-by: Dave <dave@gray101.com>
2023-12-08 10:01:02 +01:00
Ettore Di Giacinto
6011911746 fix(piper): pin petals, phonemize and espeak (#1393)
* fix: pin phonemize and espeak

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: pin petals deps

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-12-07 22:58:41 +01:00
LocalAI [bot]
997119c27a ⬆️ Update ggerganov/llama.cpp (#1385)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-05 15:44:24 +01:00
Dave
2eb6865a27 Fix: API Key / JSON Fast Follow #1 (#1388)
fast follow fix #1 - imports, final loop, one last chance to skip

Co-authored-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-05 10:35:27 +00:00
Ettore Di Giacinto
2b2d6673ff exllama(v2): fix exllamav1, add exllamav2 (#1384)
* fix(exllama): fix exllama deps with anaconda

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(exllamav2): add exllamav2 backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-12-05 08:15:37 +01:00
lunamidori5
563c5b7ea0 Added Check API KEYs file to API.go (#1381)
Added API KEYs file

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-04 22:06:45 -05:00
LocalAI [bot]
67966b623c ⬆️ Update ggerganov/llama.cpp (#1379)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-04 18:36:34 +01:00
LocalAI [bot]
9fc3fd04be ⬆️ Update ggerganov/whisper.cpp (#1378)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-04 18:36:22 +01:00
Ettore Di Giacinto
238fec244a fix(vall-e-x): correctly install reqs in environment (#1377) 2023-12-03 21:16:36 +01:00
LocalAI [bot]
3d71bc9b64 ⬆️ Update ggerganov/whisper.cpp (#1227)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-03 01:16:07 +01:00
Felix Erkinger
3923024d84 update whisper_cpp with CUBLAS, HIPBLAS, METAL, OPENBLAS, CLBLAST support (#1302)
update whisper_cpp to 1.5.1 with OPENBLAS, METAL, HIPBLAS, CUBLAS, CLBLAST support
2023-12-02 10:10:18 +00:00
Ettore Di Giacinto
710b195be1 Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-02 08:55:26 +01:00
Ettore Di Giacinto
6e408137ee Update fine-tuning.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-02 08:54:21 +01:00
Ettore Di Giacinto
9b205cfcfc Update fine-tuning.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-02 08:52:00 +01:00
LocalAI [bot]
42a80d1b8b ⬆️ Update ggerganov/llama.cpp (#1375)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-12-02 00:09:48 +00:00
Ettore Di Giacinto
d6073ac18e Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-01 20:05:58 +01:00
Ettore Di Giacinto
1c450d46cf Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-12-01 20:01:07 +01:00
lunamidori5
6b312a8522 Site Clean up - How to Clean up (#1342)
* Create easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request-curl.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request-openai-v0.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request-openai-v1.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-request.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-request-openai-v1.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-request-openai-v0.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-request-curl.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update and rename easy-model-import-downloaded.md to easy-model.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-gpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-setup-docker-cpu.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/autogen-setup.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Delete docs/content/howtos/easy-request-autogen.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update easy-model.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.en.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* Update _index.md

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

---------

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-12-01 19:12:21 +01:00
Ettore Di Giacinto
2b2007ae9e docs: add fine-tuning example (#1374)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-12-01 19:11:45 +01:00
Dave
e94a34be8c fix: OSX Build Fix Part 1: Metal (#1365)
* Make Metal the default on OSX, simplify osx-specific code, and fix the file copy error.

* fix endif / comment
2023-11-30 19:50:50 +01:00
Ettore Di Giacinto
c3fb4b1d8e ci: rename workflow
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-11-30 19:25:33 +01:00
Ettore Di Giacinto
e3ca1a7dbe ci: split into reusable workflows (#1366)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-30 19:24:37 +01:00
B4ckslash
2d64d8b444 fix/docs: Python backend dependencies (#1360)
* Update docs for new requirements.txt path

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>

* Fix typo (.PONY -> .PHONY) in python backend makefiles

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>

---------

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>
2023-11-30 17:46:55 +01:00
Ettore Di Giacinto
9b98be160a ci: limit concurrent jobs (#1364)
* ci: limit concurrent image push

* docs: mention core images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-30 17:45:20 +01:00
LocalAI [bot]
9f708ff318 ⬆️ Update ggerganov/llama.cpp (#1363)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-30 00:06:28 +01:00
Ettore Di Giacinto
4e0ad33d92 docs: Update getting started and GPU section (#1362) 2023-11-29 18:51:57 +01:00
LocalAI [bot]
519285bf38 ⬆️ Update ggerganov/llama.cpp (#1351)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-29 08:29:03 +01:00
Ettore Di Giacinto
fd1b7b3f22 docs: Add docker instructions, add community projects section in README (#1359)
docs: Add docker instructions
2023-11-28 23:14:16 +01:00
Gianluca Boiano
687730a7f5 fix: go-piper add libucd at linking time (#1357)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2023-11-28 19:55:09 +00:00
Ettore Di Giacinto
b7821361c3 feat(petals): add backend (#1350)
* feat(petals): add backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-28 09:01:46 +01:00
LocalAI [bot]
63e1f8fffd ⬆️ Update ggerganov/llama.cpp (#1345)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-27 09:02:19 +01:00
Ettore Di Giacinto
824612f1b4 feat: initial watchdog implementation (#1341)
* feat: initial watchdog implementation

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

* fiuxups

* Add more output

* wip: idletime checker

* wire idle watchdog checks

* enlarge watchdog time window

* small fixes

* Use stopmodel

* Always delete process

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-26 18:36:23 +01:00
LocalAI [bot]
9482acfdfc ⬆️ Update ggerganov/llama.cpp (#1340)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-26 09:27:42 +01:00
Ettore Di Giacinto
c75bdd99e4 fix: rename transformers.py to avoid circular import (#1337)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-26 08:49:43 +01:00
Ettore Di Giacinto
6f34e8f044 fix: propagate CMAKE_ARGS when building grpc (#1334)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-25 13:53:51 +01:00
Ettore Di Giacinto
6d187af643 fix: handle grpc and llama-cpp with REBUILD=true (#1328)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-25 08:48:24 +01:00
LocalAI [bot]
97e9598c79 ⬆️ Update ggerganov/llama.cpp (#1330)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-24 23:45:05 +01:00
B4ckslash
5a6a6de3d7 docs: Update Features->Embeddings page to reflect backend restructuring (#1325)
* Update path to sentencetransformers backend for local execution

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>

* Rename huggingface-embeddings -> sentencetransformers in embeddings.md for consistency with the backend structure

The Dockerfile still knows the "huggingface-embeddings"
backend (I assume for compatibility reasons) but uses the
sentencetransformers backend under the hood anyway.

I figured it would be good to update the docs to use the new naming to
make it less confusing moving forward. As the docker container knows
both the "huggingface-embeddings" and the "sentencetransformers"
backend, this should not break anything.

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>

---------

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>
2023-11-24 18:21:04 +01:00
LocalAI [bot]
b1a20effde ⬆️ Update ggerganov/llama.cpp (#1323)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-24 08:32:36 +01:00
Ettore Di Giacinto
ba5ab26f2e docs: Add llava, update hot topics (#1322)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-23 18:54:55 +01:00
Dave
69f53211a1 Feat: OSX Local Codesigning (#1319)
* stage makefile

* OSX local code signing and entitlements file to fix incoming connections prompt
2023-11-23 15:22:54 +01:00
B4ckslash
9dddd1134d fix: move python header comments below shebang in some backends (#1321)
* Fix python header comments for some extra gRPC backends

When a Python script is to be executed directly via exec(3), either the platform knows how to execute
the file itself (i.e. special configuration is necessary) or the first line
contains a shebang (#!) specifying the interpreter to run it (similar to
shell scripts).

The shebang MUST be on the first line for the script to work on all platforms,
so any header comments need to be in the lines following it. Otherwise
executing these scripts as extra backends will yield an "exec format
error" message.

Changes:
* Move introductory comments below the shebang line
* Change header comment in transformers.py to refer to the correct
  python module

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>

* Make header comment in ttsbark.py more specific

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>

---------

Signed-off-by: Marcus Köhler <khler.marcus@gmail.com>
2023-11-23 15:22:37 +01:00
Ettore Di Giacinto
c5c77d2b0d docs: Initial import from localai-website (#1312)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-22 18:13:50 +01:00
LocalAI [bot]
763f94ca80 ⬆️ Update ggerganov/llama.cpp (#1313)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-22 08:37:11 +01:00
ok2sh
20d637e7b7 fix: ExLlama Backend Context Size & Rope Scaling (#1311)
* fix: context_size not propagated to exllama backend

* fix: exllama rope scaling
2023-11-21 19:26:39 +01:00
LocalAI [bot]
480b14c8dc ⬆️ Update ggerganov/llama.cpp (#1310)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-21 00:20:37 +01:00
Ettore Di Giacinto
999db4301a ci(core): add -core images without python deps (#1309)
* ci(core): add -core images without python deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci(core): use public runners

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-20 23:01:31 +01:00
Ettore Di Giacinto
92cbc4d516 feat(transformers): add embeddings with Automodel (#1308)
* Update huggingface.py

Switch SentenceTransformer for AutoModel in order to set trust_remote_code needed to use the encode method with embeddings models like jinai-v2

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>

* feat(transformers): split in separate backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Lucas Hänke de Cansino <lhc@next-boss.eu>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Lucas Hänke de Cansino <lhc@next-boss.eu>
2023-11-20 21:21:17 +01:00
LocalAI [bot]
ff9afdb0fe ⬆️ Update ggerganov/llama.cpp (#1306)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-20 08:16:00 +01:00
LocalAI [bot]
3e35b20a02 ⬆️ Update mudler/go-piper (#1305)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-19 09:01:40 +01:00
LocalAI [bot]
9ea371d6cd ⬆️ Update ggerganov/llama.cpp (#1304)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-19 08:49:05 +01:00
Ettore Di Giacinto
7a0f9767da docs: fix heading
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-11-18 15:04:00 +01:00
Ettore Di Giacinto
9d7363f2a7 docs: update configuration readme
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-11-18 15:03:15 +01:00
Ettore Di Giacinto
8ee5cf38fd Delete examples/configurations/llava/README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-11-18 15:01:39 +01:00
Ettore Di Giacinto
a6b788d220 docs: update LLaVa instructions
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-11-18 15:01:16 +01:00
lunamidori5
ccd87cd9f0 llava.yaml (yaml format standardization) (#1303)
Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-11-18 14:48:54 +01:00
LocalAI [bot]
b5af87fc6c ⬆️ Update ggerganov/llama.cpp (#1300)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-18 08:19:10 +01:00
Ettore Di Giacinto
3c9544b023 refactor: rename llama-stable to llama-ggml (#1287)
* refactor: rename llama-stable to llama-ggml

* Makefile: get sources in sources/

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixup path

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixup sources

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups sd

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* update SD

* fixup

* fixup: create piper libdir also when not built

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix make target on linux test

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-18 08:18:43 +01:00
Mathias
2f65671070 fix(api/config): allow YAML config with .yml (#1299)
This commit allow to use both `.yml` and `.yaml` extensions for YAML configuration files as
it is usually expected.
2023-11-17 22:47:30 +01:00
LocalAI [bot]
8c5436cbed ⬆️ Update ggerganov/llama.cpp (#1297)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-17 08:45:22 +01:00
Ettore Di Giacinto
548959b50f feat: queue up requests if not running parallel requests (#1296)
Return a GRPC which handles a lock in case it is not meant to be
parallel.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-16 22:20:16 +01:00
LocalAI [bot]
2addb9f99a ⬆️ Update ggerganov/llama.cpp (#1291)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-16 08:20:26 +01:00
Ettore Di Giacinto
fdd95d1d86 feat: allow to run parallel requests (#1290)
* feat: allow to run parallel requests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixup

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-16 08:20:05 +01:00
Ettore Di Giacinto
66a558ff41 fix: respect OpenAI spec for response format (#1289)
fix: properly respect OpenAI spec for response format

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-15 19:36:23 +01:00
LocalAI [bot]
733b612eb2 ⬆️ Update ggerganov/llama.cpp (#1288)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-15 18:41:09 +01:00
LocalAI [bot]
991ecce004 ⬆️ Update ggerganov/llama.cpp (#1285)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-14 18:23:09 +01:00
Ettore Di Giacinto
ad0e30bca5 refactor: move backends into the backends directory (#1279)
* refactor: move backends into the backends directory

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactor: move main close to implementation for every backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-13 22:40:16 +01:00
LocalAI [bot]
55461188a4 ⬆️ Update ggerganov/llama.cpp (#1282)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-13 00:48:26 +00:00
LocalAI [bot]
5d2405fdef ⬆️ Update ggerganov/llama.cpp (#1280)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-11 23:26:54 +00:00
LocalAI [bot]
e9f1268225 ⬆️ Update ggerganov/llama.cpp (#1272)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-11 20:00:28 +00:00
Ettore Di Giacinto
803a0ac02a feat(llama.cpp): support lora with scale and yarn (#1277)
* feat(llama.cpp): support lora with scale

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(llama.cpp): support yarn

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-11 18:40:48 +01:00
Gianluca Boiano
bde87d00b9 deps(go-piper): update to 2023.11.6-3 (#1257)
Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2023-11-11 18:40:26 +01:00
Ettore Di Giacinto
0eae727366 🔥 add LaVA support and GPT vision API, Multiple requests for llama.cpp, return JSON types (#1254)
* wip

* wip

* Make it functional

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wip

* Small fixups

* do not inject space on role encoding, encode img at beginning of messages

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add examples/config defaults

* Add include dir of current source dir

* cleanup

* fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

* Revert "fixups"

This reverts commit f1a4731cca.

* fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-11 13:14:59 +01:00
LocalAI [bot]
3b4c5d54d8 ⬆️ Update ggerganov/llama.cpp (#1265)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-10 08:50:42 +01:00
LocalAI [bot]
4e16bc2f13 ⬆️ Update ggerganov/llama.cpp (#1256)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-08 08:21:12 +01:00
LocalAI [bot]
562ac62f59 ⬆️ Update ggerganov/llama.cpp (#1242)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-07 08:37:55 +01:00
Diego
e7fa2e06f8 Fixes the bug 1196 (#1232)
* Current state of the branch.

* Now gRPC is build only when the BUILD_GRPC_FOR_BACKEND_LLAMA variable is defined.

* Now the local compilation of gRPC is executed on BUILD_GRPC_FOR_BACKEND_LLAMA.

* Revised the Makefile.

* Removed replace directives in go.mod.

---------

Signed-off-by: Diego <38375572+diego-minguzzi@users.noreply.github.com>
Co-authored-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-11-06 19:07:46 +01:00
Ettore Di Giacinto
8123f009d0 dockerfile: fixup duplicate
This should have been "exllama"

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-05 14:09:31 +01:00
Ettore Di Giacinto
622aaa9f7d dockerfile: avoid pushing a big layer
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-05 10:31:33 +01:00
Ettore Di Giacinto
7b1ee203ce tests: re-add flake-attempts
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-11-05 09:01:03 +01:00
Ettore Di Giacinto
f347e51927 feat(conda): conda environments (#1144)
* feat(autogptq): add a separate conda environment for autogptq (#1137)

**Description**

This PR related to #1117

**Notes for Reviewers**

Here we lock down the version of the dependencies. Make sure it can be
used all the time without failed if the version of dependencies were
upgraded.

I change the order of importing packages according to the pylint, and no
change the logic of code. It should be ok.

I will do more investigate on writing some test cases for every backend.
I can run the service in my environment, but there is not exist a way to
test it. So, I am not confident on it.

Add a README.md in the `grpc` root. This is the common commands for
creating `conda` environment. And it can be used to the reference file
for creating extral gRPC backend document.

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* [Extra backend] Add seperate environment for ttsbark (#1141)

**Description**

This PR relates to #1117

**Notes for Reviewers**

Same to the latest PR:
* The code is also changed, but only the order of the import package
parts. And some code comments are also added.
* Add a configuration of the `conda` environment
* Add a simple test case for testing if the service can be startup in
current `conda` environment. It is succeed in VSCode, but the it is not
out of box on terminal. So, it is hard to say the test case really
useful.

**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.

<!--
Thank you for contributing to LocalAI!

Contributing Conventions
-------------------------

The draft above helps to give a quick overview of your PR.

Remember to remove this comment and to at least:

1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.

If no one reviews your PR within a few days, please @-mention @mudler.
-->

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(conda): add make target and entrypoints for the dockerfile

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(conda): Add seperate conda env for diffusers (#1145)

**Description**

This PR relates to  #1117

**Notes for Reviewers**

* Add `conda` env `diffusers.yml`
* Add Makefile to create it automatically
* Add `run.sh` to support running as a extra backend
  * Also adding it to the main Dockerfile
* Add make command in the root Makefile
* Testing the server, it can start up under the env

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(conda):Add seperate env for vllm (#1148)

**Description**

This PR is related to #1117

**Notes for Reviewers**

* The gRPC server can be started as normal
* The test case can be triggered in VSCode
* Same to other this kind of PRs, add `vllm.yml` Makefile and add
`run.sh` to the main Dockerfile, and command to the main Makefile

**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.

<!--
Thank you for contributing to LocalAI!

Contributing Conventions
-------------------------

The draft above helps to give a quick overview of your PR.

Remember to remove this comment and to at least:

1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.

If no one reviews your PR within a few days, please @-mention @mudler.
-->

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(conda):Add seperate env for huggingface (#1146)

**Description**

This PR is related to  #1117

**Notes for Reviewers**

* Add conda env `huggingface.yml`
* Change the import order, and also remove the no-used packages
* Add `run.sh` and `make command` to the main Dockerfile and Makefile
* Add test cases for it. It can be triggered and succeed under VSCode
Python extension but it is hang by using `python -m unites
test_huggingface.py` in the terminal

```
Running tests (unittest): /workspaces/LocalAI/extra/grpc/huggingface
Running tests: /workspaces/LocalAI/extra/grpc/huggingface/test_huggingface.py::TestBackendServicer::test_embedding
/workspaces/LocalAI/extra/grpc/huggingface/test_huggingface.py::TestBackendServicer::test_load_model
/workspaces/LocalAI/extra/grpc/huggingface/test_huggingface.py::TestBackendServicer::test_server_startup
./test_huggingface.py::TestBackendServicer::test_embedding Passed

./test_huggingface.py::TestBackendServicer::test_load_model Passed

./test_huggingface.py::TestBackendServicer::test_server_startup Passed

Total number of tests expected to run: 3
Total number of tests run: 3
Total number of tests passed: 3
Total number of tests failed: 0
Total number of tests failed with errors: 0
Total number of tests skipped: 0

Finished running tests!
```

**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.

<!--
Thank you for contributing to LocalAI!

Contributing Conventions
-------------------------

The draft above helps to give a quick overview of your PR.

Remember to remove this comment and to at least:

1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.

If no one reviews your PR within a few days, please @-mention @mudler.
-->

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(conda): Add the seperate conda env for VALL-E X (#1147)

**Description**

This PR is related  to #1117

**Notes for Reviewers**

* The gRPC server cannot start up

```
(ttsvalle) @Aisuko ➜ /workspaces/LocalAI (feat/vall-e-x) $ /opt/conda/envs/ttsvalle/bin/python /workspaces/LocalAI/extra/grpc/vall-e-x/ttsvalle.py
Traceback (most recent call last):
  File "/workspaces/LocalAI/extra/grpc/vall-e-x/ttsvalle.py", line 14, in <module>
    from utils.generation import SAMPLE_RATE, generate_audio, preload_models
ModuleNotFoundError: No module named 'utils'
```

The installation steps follow
https://github.com/Plachtaa/VALL-E-X#-installation below:

* Under the `ttsvalle` conda env

```
git clone https://github.com/Plachtaa/VALL-E-X.git
cd VALL-E-X
pip install -r requirements.txt
```

**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.

<!--
Thank you for contributing to LocalAI!

Contributing Conventions
-------------------------

The draft above helps to give a quick overview of your PR.

Remember to remove this comment and to at least:

1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.

If no one reviews your PR within a few days, please @-mention @mudler.
-->

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: set image type

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(conda):Add seperate conda env for exllama (#1149)

Add seperate env for exllama

Signed-off-by: Aisuko <urakiny@gmail.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Setup conda

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Set image_type arg

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: prepare only conda env in tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Dockerfile: comment manual pip calls

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* conda: add conda to PATH

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixes

* add shebang

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* file perms

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* debug

* Install new conda in the worker

* Disable GPU tests for now until the worker is back

* Rename workflows

* debug

* Fixup conda install

* fixup(wrapper): pass args

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Aisuko <urakiny@gmail.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Aisuko <urakiny@gmail.com>
2023-11-04 15:30:32 +01:00
LocalAI [bot]
9b17af18b3 ⬆️ Update ggerganov/llama.cpp (#1236)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-03 19:23:53 +01:00
Samuel Walker
23c7fbfe6b chianlit example (#1238) 2023-11-02 22:56:46 +01:00
Samuel Walker
035fea676a llama index example (#1237) 2023-11-02 13:35:06 -07:00
Vitor Oliveira
6e1a234d15 feat(certificates): add support for custom CA certificates (#880)
This change facilitates users working behind corporate firewalls or proxies. By allowing the integration of custom CA certificates, users can handle SSL connections that are intercepted by company infrastructure.
2023-11-01 20:10:14 +01:00
LocalAI [bot]
5b596ea605 ⬆️ Update ggerganov/llama.cpp (#1231)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-11-01 12:44:34 +00:00
Dave
6bd56460de Update .gitignore for backend/llama.cpp (#1235)
Signed-off-by: Dave <dave@gray101.com>
2023-11-01 09:52:02 +01:00
LocalAI [bot]
6ef7ea2635 ⬆️ Update ggerganov/llama.cpp (#1207)
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-10-30 08:00:36 +00:00
Ettore Di Giacinto
f8c00fbaf1 ci: enlarge download timeout window
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-29 22:09:35 +01:00
Ettore Di Giacinto
d9a42cc4c5 ci: run only cublas on selfhosted (#1224)
* ci: run only cublas on selfhosted

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* debug

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* update git

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* change testing embeddings model link

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-29 22:04:43 +01:00
Ettore Di Giacinto
fc0bc32814 ci: use self-hosted to build container images (#1206)
ci: use self-hosted

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-10-26 21:13:40 +02:00
Ettore Di Giacinto
c62504ac92 cleanup: drop bloomz and ggllm as now supported by llama.cpp (#1217)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-26 07:43:31 +02:00
Ettore Di Giacinto
f227e918f9 feat(llama.cpp): Bump llama.cpp, adapt grpc server (#1211)
* feat(llama.cpp): Bump llama.cpp, adapt grpc server

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-25 20:56:25 +02:00
Ettore Di Giacinto
c132dbadce docs(examples): Add mistral example (#1214)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-25 20:56:12 +02:00
Dave
b839eb80a1 Fix backend/cpp/llama CMakeList.txt on OSX (#1212)
* Fix backend/cpp/llama CMakeList.txt on OSX - detect OSX and use homebrew libraries

* sneak a logging fix in too for gallery debugging

* additional logging
2023-10-25 20:53:26 +02:00
renovate[bot]
23b03a7f03 fix(deps): update module github.com/onsi/gomega to v1.28.1 (#1205)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-24 09:16:02 +02:00
LocalAI [bot]
9196583651 ⬆️ Update ggerganov/llama.cpp (#1204)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-10-23 19:06:39 +02:00
Ettore Di Giacinto
fd28252e55 fix(Dockerfile): try to save some space
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-22 17:13:39 +02:00
renovate[bot]
94f20e2eb7 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to c25dc51 (#1191)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-22 16:58:45 +02:00
Ettore Di Giacinto
5ced99a8e7 ci: more cleanup for workers
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-10-22 12:27:04 +02:00
LocalAI [bot]
c377e61ff0 ⬆️ Update go-skynet/go-llama.cpp (#1156)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-10-22 08:55:44 +02:00
Ettore Di Giacinto
a6fe0a020a feat(llama.cpp): update (#1200)
**Description**

This PR updates llama.cpp to
465219b914

Supersedes #1195
2023-10-21 18:44:37 +02:00
Ettore Di Giacinto
bf2ed3d752 fix(Dockerfile): piper phonemize is required during build
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-21 16:40:41 +02:00
Ettore Di Giacinto
d17a92eef3 example(bruno): add image generation
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-21 11:38:23 +02:00
Ettore Di Giacinto
1a7be035d3 fix(Makefile): build all backends if none is specified
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-21 11:34:59 +02:00
Ettore Di Giacinto
004baaa30f feat(llama.cpp): update
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-21 11:04:03 +02:00
renovate[bot]
ef19268418 chore(deps): update actions/checkout action to v4 (#1006)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-21 08:55:44 +02:00
renovate[bot]
e82470341f fix(deps): update module google.golang.org/grpc to v1.59.0 (#1189)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-20 17:04:14 +02:00
renovate[bot]
88fa42de75 fix(deps): update github.com/tmc/langchaingo digest to c636b3d (#1188)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-20 17:03:01 +02:00
Ettore Di Giacinto
432513c3ba ci: add GPU tests (#1095)
* ci: test GPU

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: show logs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Debug

* debug

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* split extra/core images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* split extra/core images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* consider runner host dir

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-19 13:50:40 +02:00
renovate[bot]
45370c212b fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 9a19c74 (#1179)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-17 18:37:27 +02:00
Jesús Espino
e91f660eb1 feat(metrics): Adding initial support for prometheus metrics (#1176)
* feat(metrics): Adding initial support for prometheus metrics

* Fixing CI

* run go mod tidy
2023-10-17 18:22:53 +02:00
renovate[bot]
3f3162e57c fix(deps): update module github.com/gofiber/fiber/v2 to v2.50.0 (#1177)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-16 21:47:44 +02:00
renovate[bot]
208d1fce58 fix(deps): update github.com/tmc/langchaingo digest to a02d4fd (#1175)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-16 21:46:53 +02:00
Ettore Di Giacinto
128694213f feat: llama.cpp gRPC C++ backend (#1170)
* wip: llama.cpp c++ gRPC server

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* make it work, attach it to the build process

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* update deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: add protobuf dep

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* try fix protobuf on cmake

* cmake: workarounds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* add packages

* cmake: use fixed version of grpc

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* cmake(grpc): install locally

* install grpc

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* install required deps for grpc on debian bullseye

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* debug

* debug

* Fixups

* no need to install cmake manually

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: fixup macOS

* use brew whenever possible

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* macOS fixups

* debug

* fix container build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* workaround

* try mac

https://stackoverflow.com/questions/23905661/on-mac-g-clang-fails-to-search-usr-local-include-and-usr-local-lib-by-def

* Disable temp. arm64 docker image builds

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-16 21:46:29 +02:00
Jesús Espino
8034ed3473 Adding transcript subcommand (#1171)
Adding the transcript subcommand to the localai binary

This PR is related to #816
2023-10-15 09:17:41 +02:00
renovate[bot]
d22069c59e fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 22de3c5 (#1172)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `10f9b49` -> `22de3c5` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy44LjEiLCJ1cGRhdGVkSW5WZXIiOiIzNy44LjEiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-14 12:29:22 +02:00
renovate[bot]
5a04d32b39 fix(deps): update module github.com/sashabaranov/go-openai to v1.16.0 (#1159)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/sashabaranov/go-openai](https://togithub.com/sashabaranov/go-openai)
| require | minor | `v1.15.4` -> `v1.16.0` |

---

### Release Notes

<details>
<summary>sashabaranov/go-openai
(github.com/sashabaranov/go-openai)</summary>

###
[`v1.16.0`](https://togithub.com/sashabaranov/go-openai/releases/tag/v1.16.0)

[Compare
Source](https://togithub.com/sashabaranov/go-openai/compare/v1.15.4...v1.16.0)

#### What's Changed

- Add DotProduct Method and README Example for Embedding Similarity
Search by [@&#8203;ealvar3z](https://togithub.com/ealvar3z) in
[https://github.com/sashabaranov/go-openai/pull/492](https://togithub.com/sashabaranov/go-openai/pull/492)
- fix: use any for n_epochs by
[@&#8203;henomis](https://togithub.com/henomis) in
[https://github.com/sashabaranov/go-openai/pull/499](https://togithub.com/sashabaranov/go-openai/pull/499)
- Feat Add headers to openai responses by
[@&#8203;henomis](https://togithub.com/henomis) in
[https://github.com/sashabaranov/go-openai/pull/506](https://togithub.com/sashabaranov/go-openai/pull/506)
- Support get http header and x-ratelimit-\* headers by
[@&#8203;liushuangls](https://togithub.com/liushuangls) in
[https://github.com/sashabaranov/go-openai/pull/507](https://togithub.com/sashabaranov/go-openai/pull/507)

**Full Changelog**:
https://github.com/sashabaranov/go-openai/compare/v1.15.4...v1.16.0

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy44LjEiLCJ1cGRhdGVkSW5WZXIiOiIzNy44LjEiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-14 12:28:58 +02:00
Jesús Espino
ab65f3a17d Addining the tts command line subcommand (#1169)
This PR adds the tts (Text to Speach) command to the localai binary.

This PR is related to the issue #816
2023-10-14 12:27:35 +02:00
renovate[bot]
4e23cbebcf fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 10f9b49 (#1158)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `56c0d28` -> `10f9b49` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy44LjEiLCJ1cGRhdGVkSW5WZXIiOiIzNy44LjEiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-13 18:31:13 +02:00
Ettore Di Giacinto
63418c1afc ci: cleanup worker (#1166)
**Description**

Tries to make CI green again

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-10-12 18:09:56 +02:00
Jesús Espino
8ca671761a feat(cli): Adding models subcommand with list and install subcommands (#1165)
Adding subcommands to do certain actions directly from the command line.
I'm starting with the models subcommand allowing you to list models from
your galleries and install them.

This PR partially fixes #816

My intention is to keep adding other subcommands, but I think this is a
good start, and I think this already provides value.

Also, I added a new dependency to generate the progress bar in the
command line, it is not "needed" but I think is a nice to have to have a
cooler interface.

Here is a screenshot:

![imagen](https://github.com/go-skynet/LocalAI/assets/290303/8d8c1bf0-5340-46ce-9362-812694f914cd)
2023-10-12 10:45:34 +02:00
Jesús Espino
81a5ed9f31 fix(openai): Populate ID and Created fields in OpenAI compatible responses (#1164)
Adding the extra ID and Created fields to any request to the OpenAI
Compatible API to improve the compatibility.

This PR fixes #1103
2023-10-12 02:00:08 +00:00
renovate[bot]
528b9d9206 fix(deps): update github.com/go-skynet/go-llama.cpp digest to aeba71e (#1155)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `1676dcd` -> `aeba71e` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy44LjEiLCJ1cGRhdGVkSW5WZXIiOiIzNy44LjEiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-11 18:19:13 +02:00
renovate[bot]
1a4c57fac2 fix(deps): update module google.golang.org/grpc to v1.58.3 (#1160)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [google.golang.org/grpc](https://togithub.com/grpc/grpc-go) | require
| patch | `v1.58.2` -> `v1.58.3` |

---

### Release Notes

<details>
<summary>grpc/grpc-go (google.golang.org/grpc)</summary>

### [`v1.58.3`](https://togithub.com/grpc/grpc-go/releases/tag/v1.58.3)

[Compare
Source](https://togithub.com/grpc/grpc-go/compare/v1.58.2...v1.58.3)

### Security

- server: prohibit more than MaxConcurrentStreams handlers from running
at once (CVE-2023-44487)

In addition to this change, applications should ensure they do not leave
running tasks behind related to the RPC before returning from method
handlers, or should enforce appropriate limits on any such work.

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy44LjEiLCJ1cGRhdGVkSW5WZXIiOiIzNy44LjEiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-11 18:18:32 +02:00
Dave
44a7045732 Feats: bruno example, gallery improvements for new scraper (#1161)
This PR bundles together two unrelated features:

1. Model Gallery improvements - specifically, the ability to follow
".ref" gallery links (which I made up for this specific application) to
an actual gallery yaml file (in order to have stable URLs) and the
ability to load self-contained configurations, rather than always using
a base.yaml + overrides. This is groundwork for my python-based
huggingface scraper.

2. A while ago I introduced some Insomnia request templates for people
to use. Unfortunately, Insomnia has decided to tank their product... So
I've personally switched to using
[bruno](https://github.com/usebruno/bruno/). Corresponding equivalent
files that I use for my testing have been added. Just open the folder
from bruno and everything will work. No import process required.

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-10-11 18:18:12 +02:00
renovate[bot]
8ac7186185 fix(deps): update module github.com/onsi/ginkgo/v2 to v2.13.0 (#1152)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/onsi/ginkgo/v2](https://togithub.com/onsi/ginkgo) |
require | minor | `v2.12.1` -> `v2.13.0` |

---

### Release Notes

<details>
<summary>onsi/ginkgo (github.com/onsi/ginkgo/v2)</summary>

### [`v2.13.0`](https://togithub.com/onsi/ginkgo/releases/tag/v2.13.0)

[Compare
Source](https://togithub.com/onsi/ginkgo/compare/v2.12.1...v2.13.0)

#### 2.13.0

##### Features

Add PreviewSpect() to enable programmatic preview access to the suite
report (fixes
[#&#8203;1225](https://togithub.com/onsi/ginkgo/issues/1225))

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy44LjEiLCJ1cGRhdGVkSW5WZXIiOiIzNy44LjEiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-10 09:27:41 +02:00
renovate[bot]
975387f7ae fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 56c0d28 (#1140)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `6711bdd` -> `56c0d28` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4wLjMiLCJ1cGRhdGVkSW5WZXIiOiIzNy4wLjMiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-07 11:20:40 +02:00
David
d793b5af5e fix: update docker-compose.yaml (#1131)
fix issue #803
2023-10-05 22:13:18 +02:00
renovate[bot]
5188776224 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 1676dcd (#1135)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `6018c9d` -> `1676dcd` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4wLjMiLCJ1cGRhdGVkSW5WZXIiOiIzNy4wLjMiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-05 21:14:47 +02:00
LocalAI [bot]
07249c0446 ⬆️ Update go-skynet/go-llama.cpp (#1136)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-10-05 17:35:21 +02:00
renovate[bot]
188301f403 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 6018c9d (#1129)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `79f9587` -> `6018c9d` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4wLjMiLCJ1cGRhdGVkSW5WZXIiOiIzNy4wLjMiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-04 18:03:15 +02:00
LocalAI [bot]
e660721a0c ⬆️ Update go-skynet/go-llama.cpp (#1130)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-10-04 16:54:20 +02:00
renovate[bot]
e029cc66bc fix(deps): update module github.com/rs/zerolog to v1.31.0 (#1102)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/rs/zerolog](https://togithub.com/rs/zerolog) | require |
minor | `v1.30.0` -> `v1.31.0` |

---

### Release Notes

<details>
<summary>rs/zerolog (github.com/rs/zerolog)</summary>

###
[`v1.31.0`](https://togithub.com/rs/zerolog/compare/v1.30.0...v1.31.0)

[Compare
Source](https://togithub.com/rs/zerolog/compare/v1.30.0...v1.31.0)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi45Ny4xIiwidXBkYXRlZEluVmVyIjoiMzYuOTcuMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-02 18:14:33 +02:00
James Braza
e34b5f0119 Cleaning up examples/ models and starter .env files (#1124)
Closes https://github.com/go-skynet/LocalAI/issues/1066 and
https://github.com/go-skynet/LocalAI/issues/1065

Standardizes all `examples/`:
- Models in one place (other than `rwkv`, which was one-offy)
- Env files as `.env.example` with `cp`
    - Also standardizes comments and links docs
2023-10-02 18:14:10 +02:00
renovate[bot]
c223364816 fix(deps): update module github.com/sashabaranov/go-openai to v1.15.4 (#1122)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/sashabaranov/go-openai](https://togithub.com/sashabaranov/go-openai)
| require | patch | `v1.15.3` -> `v1.15.4` |

---

### Release Notes

<details>
<summary>sashabaranov/go-openai
(github.com/sashabaranov/go-openai)</summary>

###
[`v1.15.4`](https://togithub.com/sashabaranov/go-openai/releases/tag/v1.15.4)

[Compare
Source](https://togithub.com/sashabaranov/go-openai/compare/v1.15.3...v1.15.4)

#### What's Changed

- added delete fine tune model endpoint by
[@&#8203;BrendanMartin](https://togithub.com/BrendanMartin) in
[https://github.com/sashabaranov/go-openai/pull/497](https://togithub.com/sashabaranov/go-openai/pull/497)

**Full Changelog**:
https://github.com/sashabaranov/go-openai/compare/v1.15.3...v1.15.4

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4wLjMiLCJ1cGRhdGVkSW5WZXIiOiIzNy4wLjMiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-01 19:45:26 +02:00
renovate[bot]
74fd5844ca fix(deps): update module github.com/shirou/gopsutil/v3 to v3.23.9 (#1120)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/shirou/gopsutil/v3](https://togithub.com/shirou/gopsutil)
| require | patch | `v3.23.8` -> `v3.23.9` |

---

### Release Notes

<details>
<summary>shirou/gopsutil (github.com/shirou/gopsutil/v3)</summary>

###
[`v3.23.9`](https://togithub.com/shirou/gopsutil/compare/v3.23.8...v3.23.9)

[Compare
Source](https://togithub.com/shirou/gopsutil/compare/v3.23.8...v3.23.9)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4wLjMiLCJ1cGRhdGVkSW5WZXIiOiIzNy4wLjMiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-10-01 09:18:39 +00:00
renovate[bot]
4ebc86df84 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 79f9587 (#1085)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `d84f03c` -> `79f9587` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi45Ny4xIiwidXBkYXRlZEluVmVyIjoiMzcuMC4zIiwidGFyZ2V0QnJhbmNoIjoibWFzdGVyIn0=-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-30 14:15:54 +02:00
renovate[bot]
8cd03eff58 fix(deps): update github.com/tmc/langchaingo digest to e16b777 (#1101)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/tmc/langchaingo](https://togithub.com/tmc/langchaingo) |
require | digest | `2c309cf` -> `e16b777` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi45Ny4xIiwidXBkYXRlZEluVmVyIjoiMzcuMC4zIiwidGFyZ2V0QnJhbmNoIjoibWFzdGVyIn0=-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-30 08:13:13 +02:00
LocalAI [bot]
46660a16a0 ⬆️ Update go-skynet/go-llama.cpp (#1106)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-29 23:55:12 +00:00
renovate[bot]
27b097309e fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 6711bdd (#1079)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `e86c637` -> `6711bdd` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuMTA3LjIiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-29 19:18:04 +02:00
renovate[bot]
d0fa1f8e94 fix(deps): update module github.com/onsi/gomega to v1.28.0 (#1113)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/onsi/gomega](https://togithub.com/onsi/gomega) | require |
minor | `v1.27.10` -> `v1.28.0` |

---

### Release Notes

<details>
<summary>onsi/gomega (github.com/onsi/gomega)</summary>

### [`v1.28.0`](https://togithub.com/onsi/gomega/releases/tag/v1.28.0)

[Compare
Source](https://togithub.com/onsi/gomega/compare/v1.27.10...v1.28.0)

#### 1.28.0

##### Features

- Add VerifyHost handler to ghttp
([#&#8203;698](https://togithub.com/onsi/gomega/issues/698))
\[[`0b03b36`](https://togithub.com/onsi/gomega/commit/0b03b36)]

##### Fixes

- Read Body for Newer Responses in HaveHTTPBodyMatcher
([#&#8203;686](https://togithub.com/onsi/gomega/issues/686))
\[[`18d6673`](https://togithub.com/onsi/gomega/commit/18d6673)]

##### Maintenance

- Bump github.com/onsi/ginkgo/v2 from 2.11.0 to 2.12.0
([#&#8203;693](https://togithub.com/onsi/gomega/issues/693))
\[[`55a33f3`](https://togithub.com/onsi/gomega/commit/55a33f3)]
- Typo in matchers.go
([#&#8203;691](https://togithub.com/onsi/gomega/issues/691))
\[[`de68e8f`](https://togithub.com/onsi/gomega/commit/de68e8f)]
- Bump commonmarker from 0.23.9 to 0.23.10 in /docs
([#&#8203;690](https://togithub.com/onsi/gomega/issues/690))
\[[`ab17f5e`](https://togithub.com/onsi/gomega/commit/ab17f5e)]
- chore: update test matrix for Go 1.21
([#&#8203;689](https://togithub.com/onsi/gomega/issues/689))
\[[`5069017`](https://togithub.com/onsi/gomega/commit/5069017)]
- Bump golang.org/x/net from 0.12.0 to 0.14.0
([#&#8203;688](https://togithub.com/onsi/gomega/issues/688))
\[[`babe25f`](https://togithub.com/onsi/gomega/commit/babe25f)]

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4wLjMiLCJ1cGRhdGVkSW5WZXIiOiIzNy4wLjMiLCJ0YXJnZXRCcmFuY2giOiJtYXN0ZXIifQ==-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-29 19:14:36 +02:00
65a
55e38fea0e feat(llama.cpp): enable ROCm/HIPBLAS support (#1100)
**Description**

This PR fixes lack of HIPBLAS support in LocalAI.

**Notes for Reviewers**
This PR builds on https://github.com/go-skynet/go-llama.cpp/pull/235 to
enable ROCm/HIPBLAS support for gguf models running under llama.cpp
backend (not the stable ggml one). It can be enabled by using
BUILD_TYPE=hipblas. This was tested on a gfx1100 card, but should work
for gfx900,gfx1030 and other cards. Card support can be set with
AMDGPU_TARGETS environment variable.

**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
 
<!--
Thank you for contributing to LocalAI! 

Contributing Conventions
-------------------------

The draft above helps to give a quick overview of your PR.

Remember to remove this comment and to at least:

1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`). 
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.

If no one reviews your PR within a few days, please @-mention @mudler.
-->

---------

Signed-off-by: 65a <65a@63bit.net>
2023-09-28 21:42:20 +02:00
renovate[bot]
274ace2898 fix(deps): update github.com/tmc/langchaingo digest to 2c309cf (#1097)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/tmc/langchaingo](https://togithub.com/tmc/langchaingo) |
require | digest | `9c8845b` -> `2c309cf` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi45Ny4xIiwidXBkYXRlZEluVmVyIjoiMzYuOTcuMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-24 14:55:17 +02:00
Aisuko
a8cc3709c6 Add the CONTRIBUTING.md (#1098)
**Description**

This PR is related to  #105 

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
 
<!--
Thank you for contributing to LocalAI! 

Contributing Conventions
-------------------------

The draft above helps to give a quick overview of your PR.

Remember to remove this comment and to at least:

1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`). 
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.

If no one reviews your PR within a few days, please @-mention @mudler.
-->

---------

Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Aisuko <urakiny@gmail.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-09-24 14:54:55 +02:00
Ettore Di Giacinto
a28ab18987 feat(vllm): Allow to set quantization (#1094)
This particularly useful to set AWQ

**Description**

Follow up of #1015 

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-22 15:52:38 +02:00
lunamidori5
048b81373d Requested Changes from GPT4ALL to Luna-AI-Llama2 (#1092)
**Description**

This PR fixes #na

**Notes for Reviewers**
n/a

**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

---------

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
2023-09-22 11:22:17 +02:00
renovate[bot]
aea1d62ae6 fix(deps): update module google.golang.org/grpc to v1.58.2 (#1090)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [google.golang.org/grpc](https://togithub.com/grpc/grpc-go) | require
| patch | `v1.58.1` -> `v1.58.2` |

---

### Release Notes

<details>
<summary>grpc/grpc-go (google.golang.org/grpc)</summary>

### [`v1.58.2`](https://togithub.com/grpc/grpc-go/releases/tag/v1.58.2):
Release 1.58.2

[Compare
Source](https://togithub.com/grpc/grpc-go/compare/v1.58.1...v1.58.2)

### Bug Fixes

-   balancer/weighted_round_robin: fix ticker leak on update

A new ticker is created every time there is an update of addresses or
configuration, but was not properly stopped. This change stops the
ticker when it is no longer needed.

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi45Ny4xIiwidXBkYXRlZEluVmVyIjoiMzYuOTcuMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-22 08:44:45 +02:00
Ettore Di Giacinto
601e54000d fix(llama.cpp): update, run go mod tidy (#1088)
**Description**

This PR supersedes #1086

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-22 00:45:02 +02:00
ci-robbot [bot]
7bdf707dd3 ⬆️ Update go-skynet/go-llama.cpp (#1084)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-20 19:48:38 +02:00
Ettore Di Giacinto
4a7e7e9fdb fix(vall-e-x): copy vall-e-x next to the local-ai binary in the container image (#1082)
**Description**

This PR fixes vall-e-x in the container image

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-19 21:30:51 +02:00
Ettore Di Giacinto
bdf3f95346 feat(python-grpc): allow to set max workers with PYTHON_GRPC_MAX_WORKERS (#1081)
**Description**

this allows to customize the maximum number of grpc workers for python
backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-19 21:30:39 +02:00
Ettore Di Giacinto
453e9c5da9 fix(vllm): set default top_p with vllm (#1078)
**Description**

This PR fixes vllm when called with a request with an empty top_p

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-19 18:10:23 +02:00
Ettore Di Giacinto
3a69bd3ef5 Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-09-19 11:23:20 +02:00
renovate[bot]
a69c0f765e fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to e86c637 (#1059)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `cf4eb53` -> `e86c637` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-18 17:10:23 +02:00
renovate[bot]
97d1367764 fix(deps): update github.com/go-skynet/go-llama.cpp digest to b471eb7 (#1050)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `cc8a123` -> `b471eb7` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-18 17:09:51 +02:00
renovate[bot]
880e21288e fix(deps): update module github.com/valyala/fasthttp to v1.50.0 (#1060)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/valyala/fasthttp](https://togithub.com/valyala/fasthttp) |
require | minor | `v1.49.0` -> `v1.50.0` |

---

### Release Notes

<details>
<summary>valyala/fasthttp (github.com/valyala/fasthttp)</summary>

###
[`v1.50.0`](https://togithub.com/valyala/fasthttp/releases/tag/v1.50.0)

[Compare
Source](https://togithub.com/valyala/fasthttp/compare/v1.49.0...v1.50.0)

- [`8cc5539`](https://togithub.com/valyala/fasthttp/commit/8cc5539) Fix
various request timeout issues (Erik Dubbelboer)
- [`34e7da1`](https://togithub.com/valyala/fasthttp/commit/34e7da1)
Allow connection close for custom streams
([#&#8203;1603](https://togithub.com/valyala/fasthttp/issues/1603))
(Armin Becher)
- [`8236f8d`](https://togithub.com/valyala/fasthttp/commit/8236f8d)
fasthttpproxy: fix doc examples (Oleksandr Redko)
- [`4ec5c5a`](https://togithub.com/valyala/fasthttp/commit/4ec5c5a)
docs: fix typos in comments and tests (Oleksandr Redko)
- [`9aa666e`](https://togithub.com/valyala/fasthttp/commit/9aa666e)
Enable gocritic linter; fix lint issues
([#&#8203;1612](https://togithub.com/valyala/fasthttp/issues/1612))
(Oleksandr Redko)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-18 16:43:24 +02:00
James Braza
2ba9762255 Cleaned up chatbot-ui READMEs (#1075)
This PR cleans up the `chatbot-ui`/`-manual` examples:
- Fixes `Dockerfile` vs `docker-compose` confusion
- Makes it clear where to view the web UI in `## Run` sections

---------

Signed-off-by: James Braza <jamesbraza@gmail.com>
2023-09-18 16:43:06 +02:00
renovate[bot]
30f120ee6a fix(deps): update module github.com/gofiber/fiber/v2 to v2.49.2 (#1049)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/gofiber/fiber/v2](https://togithub.com/gofiber/fiber) |
require | patch | `v2.49.1` -> `v2.49.2` |

---

### Release Notes

<details>
<summary>gofiber/fiber (github.com/gofiber/fiber/v2)</summary>

### [`v2.49.2`](https://togithub.com/gofiber/fiber/releases/tag/v2.49.2)

[Compare
Source](https://togithub.com/gofiber/fiber/compare/v2.49.1...v2.49.2)

#### 🧹 Updates

- Middleware/logger: Enabling color changes padding for some fields
[#&#8203;2604](https://togithub.com/gofiber/fiber/issues/2604)
([#&#8203;2616](https://togithub.com/gofiber/fiber/issues/2616))
- Bump actions/checkout from 3 to 4
([#&#8203;2618](https://togithub.com/gofiber/fiber/issues/2618))
- Bump golang.org/x/sys from 0.11.0 to 0.12.0
([#&#8203;2617](https://togithub.com/gofiber/fiber/issues/2617))

#### 🐛 Fixes

-   Vulnerability in Ctx.IsFromLocal()

#### 📚 Documentation

- Replaced double quotes with backticks in all route parameter strings
([#&#8203;2591](https://togithub.com/gofiber/fiber/issues/2591))

**Full Changelog**:
https://github.com/gofiber/fiber/compare/v2.49.1...v2.49.2

Thank you [@&#8203;11-aryan](https://togithub.com/11-aryan) and
[@&#8203;AKARSHITJOSHI](https://togithub.com/AKARSHITJOSHI) for making
this update possible.

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-17 08:39:06 +02:00
renovate[bot]
28a36e20aa fix(deps): update module google.golang.org/grpc to v1.58.1 (#1020)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [google.golang.org/grpc](https://togithub.com/grpc/grpc-go) | require
| minor | `v1.57.0` -> `v1.58.1` |

---

### Release Notes

<details>
<summary>grpc/grpc-go (google.golang.org/grpc)</summary>

### [`v1.58.1`](https://togithub.com/grpc/grpc-go/releases/tag/v1.58.1):
Release 1.58.1

[Compare
Source](https://togithub.com/grpc/grpc-go/compare/v1.58.0...v1.58.1)

### Bug Fixes

- grpc: fix a bug that was decrementing active RPC count too early for
streaming RPCs; leading to channel moving to IDLE even though it had
open streams
- grpc: fix a bug where transports were not being closed upon channel
entering IDLE

### [`v1.58.0`](https://togithub.com/grpc/grpc-go/releases/tag/v1.58.0):
Release 1.58.0

[Compare
Source](https://togithub.com/grpc/grpc-go/compare/v1.57.0...v1.58.0)

### API Changes

See [#&#8203;6472](https://togithub.com/grpc/grpc-go/issues/6472) for
details about these changes.

- balancer: add `StateListener` to `NewSubConnOptions` for `SubConn`
state updates and deprecate `Balancer.UpdateSubConnState`
([#&#8203;6481](https://togithub.com/grpc/grpc-go/issues/6481))
    -   `UpdateSubConnState` will be deleted in the future.
- balancer: add `SubConn.Shutdown` and deprecate
`Balancer.RemoveSubConn`
([#&#8203;6493](https://togithub.com/grpc/grpc-go/issues/6493))
    -   `RemoveSubConn` will be deleted in the future.
- resolver: remove deprecated `AddressType`
([#&#8203;6451](https://togithub.com/grpc/grpc-go/issues/6451))
- This was previously used as a signal to enable the "grpclb" load
balancing policy, and to pass LB addresses to the policy. Instead,
`balancer/grpclb/state.Set()` should be used to add these addresses to
the name resolver's output. The built-in "dns" name resolver already
does this.
- resolver: add new field `Endpoints` to `State` and deprecate
`Addresses`
([#&#8203;6471](https://togithub.com/grpc/grpc-go/issues/6471))
    -   `Addresses` will be deleted in the future.

### New Features

- balancer/leastrequest: Add experimental support for least request LB
policy and least request configured as a custom xDS policy
([#&#8203;6510](https://togithub.com/grpc/grpc-go/issues/6510),
[#&#8203;6517](https://togithub.com/grpc/grpc-go/issues/6517))
    -   Set `GRPC_EXPERIMENTAL_ENABLE_LEAST_REQUEST=true` to enable
- stats: Add an RPC event for blocking caused by the LB policy's picker
([#&#8203;6422](https://togithub.com/grpc/grpc-go/issues/6422))

### Bug Fixes

- clusterresolver: fix deadlock when dns resolver responds inline with
update or error at build time
([#&#8203;6563](https://togithub.com/grpc/grpc-go/issues/6563))
- grpc: fix a bug where the channel could erroneously report
`TRANSIENT_FAILURE` when actually moving to `IDLE`
([#&#8203;6497](https://togithub.com/grpc/grpc-go/issues/6497))
- balancergroup: do not cache closed sub-balancers by default; affects
`rls`, `weightedtarget` and `clustermanager` LB policies
([#&#8203;6523](https://togithub.com/grpc/grpc-go/issues/6523))
- client: fix a bug that prevented detection of RPC status in
trailers-only RPC responses when using `ClientStream.Header()`, and
prevented retry of the RPC
([#&#8203;6557](https://togithub.com/grpc/grpc-go/issues/6557))

### Performance Improvements

- client & server: Add experimental `[With]SharedWriteBuffer` to improve
performance by reducing allocations when sending RPC messages. (Disabled
by default.)
([#&#8203;6309](https://togithub.com/grpc/grpc-go/issues/6309))
- Special Thanks:
[@&#8203;s-matyukevich](https://togithub.com/s-matyukevich)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-17 08:38:52 +02:00
ci-robbot [bot]
a8fb4d23f8 ⬆️ Update go-skynet/go-llama.cpp (#1062)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-17 08:38:28 +02:00
Manohar Joshi
f37a4ec9c8 1038 - Streamlit bot with LocalAI (#1072)
**Description**

This PR fixes #1038

Added Streamlit example and also updated readme for examples.


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [X] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->
2023-09-17 08:33:23 +02:00
Ettore Di Giacinto
31ed13094b Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-09-16 23:00:42 +02:00
Ettore Di Giacinto
8ccf5b2044 feat(speculative-sampling): allow to specify a draft model in the model config (#1052)
**Description**

This PR fixes #1013.

It adds `draft_model` and `n_draft` to the model YAML config in order to
load models with speculative sampling. This should be compatible as well
with grammars.

example:

```yaml
backend: llama                                                                                                                                                                   
context_size: 1024                                                                                                                                                                        
name: my-model-name
parameters:
  model: foo-bar
n_draft: 16                                                                                                                                                                      
draft_model: model-name
```

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-14 17:44:16 +02:00
renovate[bot]
247d85b523 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to cf4eb53 (#1047)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `f0735ef` -> `cf4eb53` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-14 10:41:07 +02:00
renovate[bot]
54688db994 chore(deps): update docker/metadata-action action to v5 (#1045)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [docker/metadata-action](https://togithub.com/docker/metadata-action)
| action | major | `v4` -> `v5` |

---

### Release Notes

<details>
<summary>docker/metadata-action (docker/metadata-action)</summary>

### [`v5`](https://togithub.com/docker/metadata-action/compare/v4...v5)

[Compare
Source](https://togithub.com/docker/metadata-action/compare/v4...v5)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-14 10:40:51 +02:00
ci-robbot [bot]
8590f5a599 ⬆️ Update go-skynet/go-llama.cpp (#1048)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-14 10:40:36 +02:00
renovate[bot]
289d51c049 fix(deps): update github.com/go-skynet/go-llama.cpp digest to cc8a123 (#1041)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `4145bd5` -> `cc8a123` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-13 10:49:40 +00:00
renovate[bot]
813eaa867c chore(deps): update docker/login-action action to v3 (#1040)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [docker/login-action](https://togithub.com/docker/login-action) |
action | major | `v2` -> `v3` |

---

### Release Notes

<details>
<summary>docker/login-action (docker/login-action)</summary>

### [`v3`](https://togithub.com/docker/login-action/compare/v2...v3)

[Compare
Source](https://togithub.com/docker/login-action/compare/v2...v3)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-13 09:17:50 +02:00
renovate[bot]
abffb16292 chore(deps): update docker/build-push-action action to v5 (#1039)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[docker/build-push-action](https://togithub.com/docker/build-push-action)
| action | major | `v4` -> `v5` |

---

### Release Notes

<details>
<summary>docker/build-push-action (docker/build-push-action)</summary>

###
[`v5`](https://togithub.com/docker/build-push-action/compare/v4...v5)

[Compare
Source](https://togithub.com/docker/build-push-action/compare/v4...v5)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-13 09:17:28 +02:00
renovate[bot]
50e439f633 fix(deps): update module github.com/sashabaranov/go-openai to v1.15.3 (#1035)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/sashabaranov/go-openai](https://togithub.com/sashabaranov/go-openai)
| require | patch | `v1.15.2` -> `v1.15.3` |

---

### Release Notes

<details>
<summary>sashabaranov/go-openai
(github.com/sashabaranov/go-openai)</summary>

###
[`v1.15.3`](https://togithub.com/sashabaranov/go-openai/releases/tag/v1.15.3)

[Compare
Source](https://togithub.com/sashabaranov/go-openai/compare/v1.15.2...v1.15.3)

#### What's Changed

- Chore Support base64 embedding format by
[@&#8203;henomis](https://togithub.com/henomis) in
[https://github.com/sashabaranov/go-openai/pull/485](https://togithub.com/sashabaranov/go-openai/pull/485)

**Full Changelog**:
https://github.com/sashabaranov/go-openai/compare/v1.15.2...v1.15.3

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-13 09:17:11 +02:00
renovate[bot]
25eb1415df fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to f0735ef (#1034)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `b6e38d6` -> `f0735ef` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-13 09:16:52 +02:00
ci-robbot [bot]
0b28220f2b ⬆️ Update go-skynet/go-llama.cpp (#1043)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-13 09:16:33 +02:00
renovate[bot]
5661740990 fix(deps): update github.com/tmc/langchaingo digest to 9c8845b (#1029)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/tmc/langchaingo](https://togithub.com/tmc/langchaingo) |
require | digest | `c85d396` -> `9c8845b` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-11 09:43:11 +02:00
ci-robbot [bot]
255c31bddf ⬆️ Update go-skynet/go-llama.cpp (#1027)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-11 09:42:54 +02:00
Ettore Di Giacinto
7888fefeea docs: Update README 2023-09-10 09:21:47 +02:00
renovate[bot]
0937835802 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 4145bd5 (#1025)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `05dc4b6` -> `4145bd5` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-10 09:19:03 +02:00
renovate[bot]
ea806b37ac fix(deps): update module github.com/sashabaranov/go-openai to v1.15.2 (#1022)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/sashabaranov/go-openai](https://togithub.com/sashabaranov/go-openai)
| require | patch | `v1.15.1` -> `v1.15.2` |

---

### Release Notes

<details>
<summary>sashabaranov/go-openai
(github.com/sashabaranov/go-openai)</summary>

###
[`v1.15.2`](https://togithub.com/sashabaranov/go-openai/releases/tag/v1.15.2)

[Compare
Source](https://togithub.com/sashabaranov/go-openai/compare/v1.15.1...v1.15.2)

#### What's Changed

- Update OpenAPI file return struct by
[@&#8203;NullpointerW](https://togithub.com/NullpointerW) in
[https://github.com/sashabaranov/go-openai/pull/486](https://togithub.com/sashabaranov/go-openai/pull/486)

**Full Changelog**:
https://github.com/sashabaranov/go-openai/compare/v1.15.1...v1.15.2

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi44My4wIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-10 09:18:28 +02:00
Ettore Di Giacinto
d6614f3149 feat(vllm): Initial vllm backend implementation (#1026)
Related to: https://github.com/go-skynet/LocalAI/issues/1015
2023-09-10 09:17:55 +02:00
Ettore Di Giacinto
9a50a39848 doc(README): update 2023-09-09 19:28:07 +02:00
Ettore Di Giacinto
2793e8f327 doc(citation): Add citation block
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-09 19:17:19 +02:00
Ettore Di Giacinto
c0bb5c4bf6 feat(vllm): Initial vllm backend implementation
Related to: https://github.com/go-skynet/LocalAI/issues/1015

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-09 17:03:23 +02:00
Ettore Di Giacinto
cc74fc93b4 feat(llama.cpp): update (#1024)
**Description**

This PR fixes #

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-08 18:38:22 +02:00
renovate[bot]
44b39195d6 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 05dc4b6 (#1004)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `d8c8547` -> `05dc4b6` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuODMuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-08 12:39:17 +02:00
Robert Deaton
2454110d81 Update README to reflect changes in Continue's config file (#1014)
**Description**

OpenAIServerInfo no longer exists, and api_base has been moved up.
Changes were made here
8967e2d53f (diff-98e147eaa7c9936befdddabb16c72447fbf8ad2df6b680c5176c24813169858e)

Signed-off-by: Robert Deaton <rdeaton@platipy.org>
2023-09-07 16:29:07 +02:00
Ettore Di Giacinto
ee59e7d45f fix(vall-e-x): make audiopath relative to models (#1012)
**Description**

This PR fixes #

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-05 19:33:36 +02:00
Ettore Di Giacinto
605c319157 feat(diffusers): don't set seed in params and respect device (#1010)
**Description**

Follow up of #998 - respect the device used to load the model and do not
specify a seed in the parameters, but rather just configure the
generator as described in
https://huggingface.co/docs/diffusers/using-diffusers/reusing_seeds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-04 19:38:38 +02:00
Ettore Di Giacinto
dc307a1cc0 feat: add vall-e-x (#1007)
**Description**

This PR fixes #985 

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-09-04 19:25:23 +02:00
quoing
e7981152b2 [query_data example] max_chunk_overlap in PromptHelper must be in 0..1 range (#1000)
**Description**

Simple fix, percentage value is expected to be float in range 0..1

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->
2023-09-04 19:12:53 +02:00
ci-robbot [bot]
b3eb5c860b ⬆️ Update go-skynet/go-llama.cpp (#1005)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-04 19:11:41 +02:00
Bo-Yi Wu
1c2f7409e3 chore(deps): remove unused package (#1003)
**Description**

Just remove Golang unused package and update the format in Makefile

Signed-off-by: appleboy <appleboy.tw@gmail.com>
2023-09-04 19:11:28 +02:00
renovate[bot]
57d41a3f94 fix(deps): update module github.com/gofiber/fiber/v2 to v2.49.1 (#1001)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/gofiber/fiber/v2](https://togithub.com/gofiber/fiber) |
require | patch | `v2.49.0` -> `v2.49.1` |

---

### Release Notes

<details>
<summary>gofiber/fiber (github.com/gofiber/fiber/v2)</summary>

### [`v2.49.1`](https://togithub.com/gofiber/fiber/releases/tag/v2.49.1)

[Compare
Source](https://togithub.com/gofiber/fiber/compare/v2.49.0...v2.49.1)

#### 🧹 Updates

- Bump github.com/valyala/fasthttp from 1.48.0 to 1.49.0
([#&#8203;2615](https://togithub.com/gofiber/fiber/issues/2615))

#### 🐛 Fixes

- Rollback changes to go.mod file
([#&#8203;2614](https://togithub.com/gofiber/fiber/issues/2614))

#### 📚 Documentation

- Add Polish translation - README_pl.md
([#&#8203;2613](https://togithub.com/gofiber/fiber/issues/2613))
- Update README_ko.md
([#&#8203;2605](https://togithub.com/gofiber/fiber/issues/2605))

**Full Changelog**:
https://github.com/gofiber/fiber/compare/v2.49.0...v2.49.1

Thank you [@&#8203;KompocikDot](https://togithub.com/KompocikDot),
[@&#8203;LimJiAn](https://togithub.com/LimJiAn) and
[@&#8203;gaby](https://togithub.com/gaby) for making this update
possible.

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuNzguOCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-04 19:11:09 +02:00
Max Cohen
f9d2bd24eb Allow to manually set the seed for the SD pipeline (#998)
**Description**

Enable setting the seed for the stable diffusion pipeline. This is done
through an additional `seed` parameter in the request, such as:

```bash
curl http://localhost:8080/v1/images/generations \
    -H "Content-Type: application/json" \
    -d '{"model": "stablediffusion", "prompt": "prompt", "n": 1, "step": 51, "size": "512x512", "seed": 3}'
```

**Notes for Reviewers**
When the `seed` parameter is not sent, `request.seed` defaults to `0`,
making it difficult to detect an actual seed of `0`. Is there a way to
change the default to `-1` for instance ?

**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->
2023-09-04 19:10:55 +02:00
ci-robbot [bot]
0e7e8eec53 ⬆️ Update go-skynet/go-llama.cpp (#1002)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-03 10:00:01 +02:00
renovate[bot]
9a30a246d8 fix(deps): update github.com/go-skynet/go-llama.cpp digest to d8c8547 (#997)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `c5622a8` -> `d8c8547` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-02 12:31:12 +00:00
ci-robbot [bot]
c332499252 ⬆️ Update go-skynet/go-llama.cpp (#996)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-09-02 09:54:50 +02:00
Dave
005f289632 feat: Model Gallery Endpoint Refactor / Mutable Galleries Endpoints (#991)
refactor for model gallery endpoints - bundle up resources into a
struct, make galleries mutable with some crud endpoints. This is
groundwork required for making efficient use of the new scraper - while
that PR isn't _quite_ ready yet, the goal is to have more, individually
smaller gallery files. Therefore, rather than requiring a full localai
service restart, these new endpoints have been added to make life
easier.

- Adds endpoints to add, list and remove model galleries at runtime
- Adds these endpoints to the Insomnia config
- Minor fix: loading file urls follows symbolic links now
2023-09-02 09:00:44 +02:00
renovate[bot]
3d7553317f fix(deps): update github.com/go-skynet/go-llama.cpp digest to c5622a8 (#992)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `bf3f946` -> `c5622a8` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-02 08:58:16 +02:00
renovate[bot]
8e4f6b2ee5 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to b6e38d6 (#988)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `27a8b02` -> `b6e38d6` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-02 08:56:43 +02:00
renovate[bot]
d5cad7d3ae fix(deps): update module github.com/shirou/gopsutil/v3 to v3.23.8 (#989)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/shirou/gopsutil/v3](https://togithub.com/shirou/gopsutil)
| require | patch | `v3.23.7` -> `v3.23.8` |

---

### Release Notes

<details>
<summary>shirou/gopsutil (github.com/shirou/gopsutil/v3)</summary>

###
[`v3.23.8`](https://togithub.com/shirou/gopsutil/releases/tag/v3.23.8)

[Compare
Source](https://togithub.com/shirou/gopsutil/compare/v3.23.7...v3.23.8)

<!-- Release notes generated using configuration in .github/release.yml
at v3.23.8 -->

#### What's Changed

[#&#8203;1514](https://togithub.com/shirou/gopsutil/issues/1514)
improves `Processes()` performance 6% or more. Thank you
[@&#8203;atoulme](https://togithub.com/atoulme) !

##### cpu

- Enable setting of vendor and related information for all Power
versions by [@&#8203;kishen-v](https://togithub.com/kishen-v) in
[https://github.com/shirou/gopsutil/pull/1495](https://togithub.com/shirou/gopsutil/pull/1495)
- chore: change CIRCLECI environment variable to CI. by
[@&#8203;shirou](https://togithub.com/shirou) in
[https://github.com/shirou/gopsutil/pull/1518](https://togithub.com/shirou/gopsutil/pull/1518)

##### disk

- fix: fixed windows disk package leaks by
[@&#8203;ozanh](https://togithub.com/ozanh) in
[https://github.com/shirou/gopsutil/pull/1501](https://togithub.com/shirou/gopsutil/pull/1501)
- fix IOCounters() SerialNumber enumeration by
[@&#8203;gdvalle](https://togithub.com/gdvalle) in
[https://github.com/shirou/gopsutil/pull/1508](https://togithub.com/shirou/gopsutil/pull/1508)

##### host

- \[host]\[linux]: remove double quote from lsb release info by
[@&#8203;shirou](https://togithub.com/shirou) in
[https://github.com/shirou/gopsutil/pull/1504](https://togithub.com/shirou/gopsutil/pull/1504)

##### mem

- mem: linux: fix vmstat field names by
[@&#8203;chouquette](https://togithub.com/chouquette) in
[https://github.com/shirou/gopsutil/pull/1498](https://togithub.com/shirou/gopsutil/pull/1498)

##### process

- Fix Processes() calls with many cores by
[@&#8203;atoulme](https://togithub.com/atoulme) in
[https://github.com/shirou/gopsutil/pull/1514](https://togithub.com/shirou/gopsutil/pull/1514)

#### New Contributors

- [@&#8203;kishen-v](https://togithub.com/kishen-v) made their first
contribution in
[https://github.com/shirou/gopsutil/pull/1495](https://togithub.com/shirou/gopsutil/pull/1495)
- [@&#8203;chouquette](https://togithub.com/chouquette) made their first
contribution in
[https://github.com/shirou/gopsutil/pull/1498](https://togithub.com/shirou/gopsutil/pull/1498)
- [@&#8203;ozanh](https://togithub.com/ozanh) made their first
contribution in
[https://github.com/shirou/gopsutil/pull/1501](https://togithub.com/shirou/gopsutil/pull/1501)
- [@&#8203;gdvalle](https://togithub.com/gdvalle) made their first
contribution in
[https://github.com/shirou/gopsutil/pull/1508](https://togithub.com/shirou/gopsutil/pull/1508)

**Full Changelog**:
https://github.com/shirou/gopsutil/compare/v3.23.7...v3.23.8

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-09-01 13:22:53 -04:00
Jirubizu
355e9d4fb5 [API] expose all the jobs via /models/jobs endpoint (#983)
**Description**

This PR fixes #


**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

Co-authored-by: Jirubizu <jirubizu@jirubizu.cc>
2023-08-31 15:03:03 +00:00
renovate[bot]
629185e10a fix(deps): update module github.com/sashabaranov/go-openai to v1.15.1 (#984)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/sashabaranov/go-openai](https://togithub.com/sashabaranov/go-openai)
| require | minor | `v1.14.2` -> `v1.15.1` |

---

### Release Notes

<details>
<summary>sashabaranov/go-openai
(github.com/sashabaranov/go-openai)</summary>

###
[`v1.15.1`](https://togithub.com/sashabaranov/go-openai/releases/tag/v1.15.1)

[Compare
Source](https://togithub.com/sashabaranov/go-openai/compare/v1.14.2...v1.15.1)

#### What's Changed

- Chore Deprecate legacy fine tunes API by
[@&#8203;henomis](https://togithub.com/henomis) in
[https://github.com/sashabaranov/go-openai/pull/484](https://togithub.com/sashabaranov/go-openai/pull/484)

**Full Changelog**:
https://github.com/sashabaranov/go-openai/compare/v1.15...v1.15.1

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-31 14:35:13 +00:00
Samuel Maynard
deeef5fc24 fix(utf8): prevent multi-byte utf8 characters from being mangled (#981)
**Description**

This PR fixes #677 using [suggested
solution](https://github.com/go-skynet/LocalAI/issues/677#issuecomment-1695939097)
from @yantoz

before:
```
❯ curl -N http://localhost:57541/v1/completions -H "Content-Type: application/json" -d '{
     "model": "ggml-model-q4_0.bin",
     "prompt": "",
     "max_tokens": 32,
     "temperature": 0.7,
     "stream": true
   }'
data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"\ufffd"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":" |"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":" I"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"'"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"text":"m"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
```

now:
```
❯ curl -N http://localhost:57541/v1/completions -H Content-Type: application/json -d {
   "model": "ggml-model-q4_0.bin",
   "prompt": "",
   "max_tokens": 32,
   "temperature": 0.7,
   "stream": true
 }
data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"😂"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":" "}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"|"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":" "}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"I"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"'"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}

data: {"object":"text_completion","model":"ggml-model-q4_0.bin","choices":[{"index":0,"text":"m"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
```

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [X] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-08-30 23:56:59 +00:00
renovate[bot]
b905c07650 fix(deps): update github.com/go-skynet/go-llama.cpp digest to bf3f946 (#979)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `9072315` -> `bf3f946` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4xIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-30 23:02:19 +02:00
Ettore Di Giacinto
1ff30034e8 fix(deps): update go-llama.cpp (#980)
**Description**

This PR bumps llama.cpp (adding support to gguf v2) and changes the
default test model

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-30 23:01:55 +02:00
renovate[bot]
c64b59c80c fix(deps): update module github.com/valyala/fasthttp to v1.49.0 (#971)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/valyala/fasthttp](https://togithub.com/valyala/fasthttp) |
require | minor | `v1.48.0` -> `v1.49.0` |

---

### Release Notes

<details>
<summary>valyala/fasthttp (github.com/valyala/fasthttp)</summary>

###
[`v1.49.0`](https://togithub.com/valyala/fasthttp/releases/tag/v1.49.0)

[Compare
Source](https://togithub.com/valyala/fasthttp/compare/v1.48.0...v1.49.0)

- [`0e99e64`](https://togithub.com/valyala/fasthttp/commit/0e99e64)
Update golangci-lint and gosec
([#&#8203;1609](https://togithub.com/valyala/fasthttp/issues/1609))
(Erik Dubbelboer)
- [`6aea1e0`](https://togithub.com/valyala/fasthttp/commit/6aea1e0) fix
round2\_32, split round2 tests because they depend on sizeof int at
compile time
([#&#8203;1607](https://togithub.com/valyala/fasthttp/issues/1607))
(Duncan Overbruck)
- [`4b0e6c7`](https://togithub.com/valyala/fasthttp/commit/4b0e6c7)
Update ErrNoMultipartForm (Erik Dubbelboer)
- [`727021a`](https://togithub.com/valyala/fasthttp/commit/727021a)
Update security policy (Erik Dubbelboer)
- [`54fdc7a`](https://togithub.com/valyala/fasthttp/commit/54fdc7a)
Abstracts the RoundTripper interface and provides a default implement
([#&#8203;1602](https://togithub.com/valyala/fasthttp/issues/1602))
(Tim)
- [`e181af1`](https://togithub.com/valyala/fasthttp/commit/e181af1)
fasthttpproxy support ipv6
([#&#8203;1597](https://togithub.com/valyala/fasthttp/issues/1597))
(Pluto)
- [`6eb2249`](https://togithub.com/valyala/fasthttp/commit/6eb2249)
fix:fasthttp server with tlsConfig
([#&#8203;1595](https://togithub.com/valyala/fasthttp/issues/1595))
(Zhang Xiaopei)
- [`1c85d43`](https://togithub.com/valyala/fasthttp/commit/1c85d43) Fix
round2 (Erik Dubbelboer)
- [`064124e`](https://togithub.com/valyala/fasthttp/commit/064124e)
Avoid nolint:errcheck in header tests
([#&#8203;1589](https://togithub.com/valyala/fasthttp/issues/1589))
(Oleksandr Redko)
- [`0d0bbfe`](https://togithub.com/valyala/fasthttp/commit/0d0bbfe) Auto
add 'Vary' header after compression
([#&#8203;1585](https://togithub.com/valyala/fasthttp/issues/1585))
(AutumnSun)
- [`d229959`](https://togithub.com/valyala/fasthttp/commit/d229959)
Remove unnecessary indent blocks
([#&#8203;1586](https://togithub.com/valyala/fasthttp/issues/1586))
(Oleksandr Redko)
- [`6b68042`](https://togithub.com/valyala/fasthttp/commit/6b68042) Use
timeout in TCPDialer to resolveTCPAddrs
([#&#8203;1582](https://togithub.com/valyala/fasthttp/issues/1582))
(un000)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42OC4wIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-29 22:16:10 +02:00
renovate[bot]
9a869bbaf6 fix(deps): update github.com/tmc/langchaingo digest to c85d396 (#962)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/tmc/langchaingo](https://togithub.com/tmc/langchaingo) |
require | digest | `1e2a401` -> `c85d396` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi41Ni4wIiwidXBkYXRlZEluVmVyIjoiMzYuNjguMSIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-29 22:15:43 +02:00
renovate[bot]
fe1b54b713 fix(deps): update module github.com/gofiber/fiber/v2 to v2.49.0 (#966)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/gofiber/fiber/v2](https://togithub.com/gofiber/fiber) |
require | minor | `v2.48.0` -> `v2.49.0` |

---

### Release Notes

<details>
<summary>gofiber/fiber (github.com/gofiber/fiber/v2)</summary>

### [`v2.49.0`](https://togithub.com/gofiber/fiber/releases/tag/v2.49.0)

[Compare
Source](https://togithub.com/gofiber/fiber/compare/v2.48.0...v2.49.0)

####  Breaking Changes

- Add config to enable splitting by comma in parsers
([#&#8203;2560](https://togithub.com/gofiber/fiber/issues/2560))
    https://docs.gofiber.io/api/fiber#config

> EnableSplittingOnParsers splits the query/body/header parameters by
comma when it's true (default: false).
>
> For example, you can use it to parse multiple values from a query
parameter like this:
> /api?foo=bar,baz == foo\[]=bar\&foo\[]=baz

#### 🚀 New

- Add custom data property to favicon middleware config
([#&#8203;2579](https://togithub.com/gofiber/fiber/issues/2579))
    https://docs.gofiber.io/api/middleware/favicon#config

> This allows the user to use //go:embed flags to load favicon data
during build-time, and supply it to the middleware instead of reading
the file every time the application starts.

#### 🧹 Updates

- Middleware/logger: Latency match gin-gonic/gin formatter
([#&#8203;2569](https://togithub.com/gofiber/fiber/issues/2569))
- Middleware/filesystem: Refactor: use `errors.Is` instead of
`os.IsNotExist`
([#&#8203;2558](https://togithub.com/gofiber/fiber/issues/2558))
- Use Global vars instead of local vars for isLocalHost
([#&#8203;2595](https://togithub.com/gofiber/fiber/issues/2595))
- Remove redundant nil check
([#&#8203;2584](https://togithub.com/gofiber/fiber/issues/2584))
- Bump github.com/mattn/go-runewidth from 0.0.14 to 0.0.15
([#&#8203;2551](https://togithub.com/gofiber/fiber/issues/2551))
- Bump github.com/google/uuid from 1.3.0 to 1.3.1
([#&#8203;2592](https://togithub.com/gofiber/fiber/issues/2592))
- Bump golang.org/x/sys from 0.10.0 to 0.11.0
([#&#8203;2563](https://togithub.com/gofiber/fiber/issues/2563))
- Add go 1.21 to ci and readmes
([#&#8203;2588](https://togithub.com/gofiber/fiber/issues/2588))

#### 🐛 Fixes

- Middleware/logger: Default latency output format
([#&#8203;2580](https://togithub.com/gofiber/fiber/issues/2580))
- Decompress request body when multi Content-Encoding sent on request
headers ([#&#8203;2555](https://togithub.com/gofiber/fiber/issues/2555))

#### 📚 Documentation

- Fix wrong JSON docs
([#&#8203;2554](https://togithub.com/gofiber/fiber/issues/2554))
- Update io/ioutil package to io package
([#&#8203;2589](https://togithub.com/gofiber/fiber/issues/2589))
- Replace EG flag with the proper and smaller SVG
([#&#8203;2585](https://togithub.com/gofiber/fiber/issues/2585))
- Added Egyptian Arabic readme file
([#&#8203;2565](https://togithub.com/gofiber/fiber/issues/2565))
- Translate README to Portuguese
([#&#8203;2567](https://togithub.com/gofiber/fiber/issues/2567))
- Improve \*fiber.Client section
([#&#8203;2553](https://togithub.com/gofiber/fiber/issues/2553))
- Improved the config section of the middleware readme´s
([#&#8203;2552](https://togithub.com/gofiber/fiber/issues/2552))
- Added documentation about ctx Fresh
([#&#8203;2549](https://togithub.com/gofiber/fiber/issues/2549))
- Update intro.md
([#&#8203;2550](https://togithub.com/gofiber/fiber/issues/2550))
- Fixed link to slim template engine
([#&#8203;2547](https://togithub.com/gofiber/fiber/issues/2547))

**Full Changelog**:
https://github.com/gofiber/fiber/compare/v2.48.0...v2.49.0

Thank you [@&#8203;Jictyvoo](https://togithub.com/Jictyvoo),
[@&#8203;Juneezee](https://togithub.com/Juneezee),
[@&#8203;Kirari04](https://togithub.com/Kirari04),
[@&#8203;LimJiAn](https://togithub.com/LimJiAn),
[@&#8203;PassTheMayo](https://togithub.com/PassTheMayo),
[@&#8203;andersonmiranda-com](https://togithub.com/andersonmiranda-com),
[@&#8203;bigpreshy](https://togithub.com/bigpreshy),
[@&#8203;efectn](https://togithub.com/efectn),
[@&#8203;renanbastos93](https://togithub.com/renanbastos93),
[@&#8203;scandar](https://togithub.com/scandar),
[@&#8203;sixcolors](https://togithub.com/sixcolors) and
[@&#8203;stefanb](https://togithub.com/stefanb) for making this update
possible.

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi42NC44IiwidXBkYXRlZEluVmVyIjoiMzYuNjQuOCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-28 08:24:13 +02:00
ci-robbot [bot]
cc84dfd50f ⬆️ Update go-skynet/go-llama.cpp (#968)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-28 08:23:51 +02:00
Ettore Di Giacinto
158c7867e7 fix(diffusers): correctly check alpha (#967)
**Description**

Loras that have no alpha would crash otherwise

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-27 15:35:59 +02:00
renovate[bot]
997c39ccd5 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 9072315 (#963)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `bf63302` -> `9072315` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi41Ni4wIiwidXBkYXRlZEluVmVyIjoiMzYuNTYuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-27 10:11:45 +02:00
Ettore Di Giacinto
3bab307904 fix(llama): resolve lora adapters correctly from the model file (#964)
**Description**

we were otherwise expecting absolute paths. this make it relative to the
model file (as someone would expect)

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->
2023-08-27 10:11:32 +02:00
Ettore Di Giacinto
02704e38d3 feat(diffusers): Add lora (#965)
**Description**

This PR fixes #914 

Now diffusers respects the `lora_adapter` configuration parameter.

---------

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-08-27 10:11:16 +02:00
renovate[bot]
9e5fb29965 fix(deps): update module github.com/otiai10/openaigo to v1.6.0 (#960)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/otiai10/openaigo](https://togithub.com/otiai10/openaigo) |
require | minor | `v1.5.2` -> `v1.6.0` |

---

### Release Notes

<details>
<summary>otiai10/openaigo (github.com/otiai10/openaigo)</summary>

###
[`v1.6.0`](https://togithub.com/otiai10/openaigo/compare/v1.5.2...v1.6.0)

[Compare
Source](https://togithub.com/otiai10/openaigo/compare/v1.5.2...v1.6.0)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi41Ni4wIiwidXBkYXRlZEluVmVyIjoiMzYuNTYuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-26 14:18:06 +02:00
renovate[bot]
7dba131d5f fix(deps): update github.com/tmc/langchaingo digest to 1e2a401 (#948)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/tmc/langchaingo](https://togithub.com/tmc/langchaingo) |
require | digest | `fef0821` -> `1e2a401` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi41Ni4wIiwidXBkYXRlZEluVmVyIjoiMzYuNTYuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-26 14:17:48 +02:00
renovate[bot]
ce0b771217 fix(deps): update github.com/go-skynet/go-llama.cpp digest to bf63302 (#930)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/go-skynet/go-llama.cpp](https://togithub.com/go-skynet/go-llama.cpp)
| require | digest | `f03869d` -> `bf63302` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi40My4yIiwidXBkYXRlZEluVmVyIjoiMzYuNTYuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-26 14:17:22 +02:00
Ettore Di Giacinto
44bc7aa3d0 feat: Allow to load lora adapters for llama.cpp (#955)
**Description**

This PR fixes #

**Notes for Reviewers**


**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
 

<!--
Thank you for contributing to LocalAI! 

Contributing Conventions:

1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR. 
3. Sign your commits

By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
-->

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-25 21:58:46 +02:00
ci-robbot [bot]
7f0c88ed3e ⬆️ Update go-skynet/go-llama.cpp (#954)
Bump of go-skynet/go-llama.cpp version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-25 18:45:40 +02:00
ci-robbot [bot]
d15508f52c ⬆️ Update nomic-ai/gpt4all (#953)
Bump of nomic-ai/gpt4all version

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-25 01:19:48 +02:00
renovate[bot]
b111423b9c fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 27a8b02 (#947)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
|
[github.com/nomic-ai/gpt4all/gpt4all-bindings/golang](https://togithub.com/nomic-ai/gpt4all)
| require | digest | `36f7fb5` -> `27a8b02` |

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi41Ni4wIiwidXBkYXRlZEluVmVyIjoiMzYuNTYuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-24 23:52:49 +02:00
renovate[bot]
215a51c4c1 fix(deps): update module github.com/onsi/ginkgo/v2 to v2.12.0 (#949)
[![Mend
Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/onsi/ginkgo/v2](https://togithub.com/onsi/ginkgo) |
require | minor | `v2.11.0` -> `v2.12.0` |

---

### Release Notes

<details>
<summary>onsi/ginkgo (github.com/onsi/ginkgo/v2)</summary>

### [`v2.12.0`](https://togithub.com/onsi/ginkgo/releases/tag/v2.12.0)

[Compare
Source](https://togithub.com/onsi/ginkgo/compare/v2.11.0...v2.12.0)

#### 2.12.0

##### Features

- feat: allow MustPassRepeatedly decorator to be set at suite level
([#&#8203;1266](https://togithub.com/onsi/ginkgo/issues/1266))
\[[`05de518`](https://togithub.com/onsi/ginkgo/commit/05de518)]

##### Fixes

- fix-errors-in-readme
([#&#8203;1244](https://togithub.com/onsi/ginkgo/issues/1244))
\[[`27c2f5d`](https://togithub.com/onsi/ginkgo/commit/27c2f5d)]

##### Maintenance

Various chores/dependency bumps.

</details>

---

### Configuration

📅 **Schedule**: Branch creation - At any time (no schedule defined),
Automerge - At any time (no schedule defined).

🚦 **Automerge**: Disabled by config. Please merge this manually once you
are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Mend
Renovate](https://www.mend.io/free-developer-tools/renovate/). View
repository job log
[here](https://developer.mend.io/github/go-skynet/LocalAI).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNi41Ni4wIiwidXBkYXRlZEluVmVyIjoiMzYuNTYuMCIsInRhcmdldEJyYW5jaCI6Im1hc3RlciJ9-->

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-24 18:58:06 +02:00
Ettore Di Giacinto
1120847f72 feat: bump llama.cpp, add gguf support (#943)
**Description**

This PR syncs up the `llama` backend to use `gguf`
(https://github.com/go-skynet/go-llama.cpp/pull/180). It also adds
`llama-stable` to the targets so we can still load ggml. It adapts the
current tests to use the `llama-backend` for ggml and uses a `gguf`
model to run tests on the new backend.

In order to consume the new version of go-llama.cpp, it also bump go to
1.21 (images, pipelines, etc)

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-24 01:18:58 +02:00
Dave
704323b805 initial draft of an importable Insomnia profile for developers (#942)
This is a starting point for developers to easily import a collection of
requests to hit LocalAI. Insomnia was chosen as it's open source, has a
graphical user interface for users desiring that, and has the ability to
easily export requests as cURL commands for our documentation site.
2023-08-23 18:39:27 +02:00
Dave
10b0e13882 feat: backend monitor shutdown endpoint, process based (#938)
This PR adds a new endpoint to the backend monitor section
`/backend/shutdown` which terminates the grpc process for the related
model.
2023-08-23 18:38:37 +02:00
Dave
901f0709c5 Feat: rwkv improvements: (#937) 2023-08-22 18:48:06 +02:00
Gruber
0d6165e481 Example: Continue (dev) (#940) 2023-08-22 18:46:45 +02:00
renovate[bot]
6583eed6b2 fix(deps): update module github.com/google/uuid to v1.3.1 (#936)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-22 10:51:04 +02:00
Dave
a9ca70ad4a infra: add setup-go@4, test against 1.20.x (go.mod) and stable (1.21) (#935) 2023-08-21 22:16:47 +02:00
Ettore Di Giacinto
ab5b75eb01 feat: add llama-stable backend (#932)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-20 16:35:42 +02:00
Ettore Di Giacinto
cc060a283d fix: drop racy code, refactor and group API schema (#931)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-20 14:04:45 +02:00
Ettore Di Giacinto
28db83e17b fix: disable usage by default (still experimental) (#929)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-19 16:15:22 +02:00
ci-robbot [bot]
dbb1f86455 ⬆️ Update nomic-ai/gpt4all (#911)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-19 10:17:41 +02:00
renovate[bot]
02f7c555af fix(deps): update github.com/tmc/langchaingo digest to fef0821 (#922)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-19 01:50:04 +02:00
renovate[bot]
d982b38f76 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 36f7fb5 (#908)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-19 01:49:51 +02:00
renovate[bot]
bc2e4b952e fix(deps): update module github.com/shirou/gopsutil/v3 to v3.23.7 (#924)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-19 01:49:43 +02:00
Ettore Di Giacinto
afdc0ebfd7 feat: add --single-active-backend to allow only one backend active at the time (#925)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-19 01:49:33 +02:00
Ettore Di Giacinto
1079b18ff7 feat(diffusers): be consistent with pipelines, support also depthimg2img (#926)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-18 22:06:24 +02:00
Dave
8cb1061c11 Usage Features (#863) 2023-08-18 21:23:14 +02:00
Ettore Di Giacinto
2bacd0180d feat(diffusers): add img2img and clip_skip, support more kernels schedulers (#906)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-17 23:38:59 +02:00
renovate[bot]
ddf9bc2335 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to a630935 (#898)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-16 22:25:28 +02:00
renovate[bot]
a1afd940e3 fix(deps): update github.com/go-skynet/go-llama.cpp digest to f03869d (#901)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-16 22:25:14 +02:00
renovate[bot]
8bb76201c0 fix(deps): update github.com/tmc/langchaingo digest to eb0cbd3 (#902)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-16 22:25:02 +02:00
Ettore Di Giacinto
ede71d398c feat(diffusers): overcome prompt limit (#904)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-16 22:24:52 +02:00
ci-robbot [bot]
0c73a637f1 ⬆️ Update nomic-ai/gpt4all (#899)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-16 01:11:54 +02:00
Ettore Di Giacinto
37700f2d98 feat(diffusers): add DPMSolverMultistepScheduler++, DPMSolverMultistepSchedulerSDE++, guidance_scale (#903)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-16 01:11:42 +02:00
Ettore Di Giacinto
0ec695f9e4 feat: make initializer accept gRPC delay times (#900)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-16 01:11:32 +02:00
renovate[bot]
7ffd21dbc8 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 18f25c2 (#894)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-15 09:25:40 +02:00
renovate[bot]
48b3920656 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 4e55940 (#893)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-15 09:25:27 +02:00
ci-robbot [bot]
63d91af555 ⬆️ Update nomic-ai/gpt4all (#878)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-15 09:25:10 +02:00
Ettore Di Giacinto
a96c3bc885 feat(diffusers): various enhancements (#895) 2023-08-14 23:12:00 +02:00
Ettore Di Giacinto
77e1ae3d70 feat(Makefile): allow to restrict backend builds (#890)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-13 20:04:08 +02:00
renovate[bot]
9cc8d90865 fix(deps): update module github.com/sashabaranov/go-openai to v1.14.2 (#884)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-12 22:41:12 +02:00
Ettore Di Giacinto
a6c621ef7f feat: pre-configure LocalAI galleries (#886)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-12 11:25:17 +02:00
renovate[bot]
328289099a fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 4d855af (#875)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-12 08:58:55 +02:00
renovate[bot]
22ffd5f490 fix(deps): update github.com/tmc/langchaingo digest to fd8b7f0 (#882)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-12 08:56:15 +02:00
Ettore Di Giacinto
81708bb1e6 fix: workaround exllama import error (#885)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-12 08:56:01 +02:00
Ettore Di Giacinto
c81e9d8d1f fix: add exllama to protogen 2023-08-11 01:02:31 +02:00
Ettore Di Giacinto
ff3ab5fcca feat: Add exllama (#881)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-11 00:49:40 +02:00
Michael Nesbitt
1d1cae8e4d feat: add API_KEY list support (#877)
Co-authored-by: Harold Sun <sunhua@amazon.com>
2023-08-10 00:06:21 +02:00
Ettore Di Giacinto
8c781a6a44 feat: Add Diffusers (#874)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-09 08:38:51 +02:00
Ettore Di Giacinto
93a4bec06b fix: upgrade pip (#872)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-08 23:20:03 +02:00
renovate[bot]
c93f57efd6 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 0f2bb50 (#869)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-08 21:57:16 +02:00
ci-robbot [bot]
0e4f93c5cf ⬆️ Update nomic-ai/gpt4all (#870)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-08 21:57:01 +02:00
Ettore Di Giacinto
5b3fedebfe feat: add bark and AutoGPTQ (#871) 2023-08-08 20:41:49 +02:00
Ettore Di Giacinto
219751bb21 fix: cut prompt from AutoGPTQ answers
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-08 01:27:38 +02:00
Ettore Di Giacinto
bb7772a364 fix: byte utf-8 encode results from autogptq
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-08 01:20:07 +02:00
Ettore Di Giacinto
3c8fc37c56 feat: Add UseFastTokenizer
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-08 01:10:05 +02:00
Ettore Di Giacinto
39805b09e5 fix: pass by env in managed services
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-08 00:58:38 +02:00
Ettore Di Giacinto
63b01199fe fix: match lowercase of the input, not of the model 2023-08-08 00:46:22 +02:00
Ettore Di Giacinto
b09bae3443 fix: autogptq requirements
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-08 00:22:15 +02:00
Ettore Di Giacinto
de6fb98bed feat: register autogptq and bark in the container image 2023-08-07 22:53:28 +02:00
Ettore Di Giacinto
433605e282 feat: add initial Bark backend implementation
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-07 22:53:28 +02:00
Ettore Di Giacinto
a843e64fc2 feat: add initial AutoGPTQ backend implementation 2023-08-07 22:53:28 +02:00
scott4290
71611d2dec docs: base-Update comments in .env for cublas, openblas, clblas (#867) 2023-08-07 08:22:42 +00:00
Ettore Di Giacinto
abf48e8a5d readme: link to hot topics in the website 2023-08-07 00:31:46 +02:00
Ettore Di Giacinto
ac5ea0cd4d readme: link usage to docs 2023-08-07 00:04:28 +02:00
Ettore Di Giacinto
a46fcacedd readme: simplify, remove dups with website 2023-08-07 00:01:01 +02:00
Ettore Di Giacinto
df947fc933 examples: Update README 2023-08-06 23:07:06 +02:00
Ettore Di Giacinto
91d49cfe9f Update README.md 2023-08-06 11:57:28 +02:00
Ettore Di Giacinto
19d15f83db Update README.md 2023-08-06 00:04:06 +02:00
Ettore Di Giacinto
cde61cc518 Update README.md 2023-08-05 23:14:09 +02:00
Ettore Di Giacinto
acd829a7a0 fix: do not break on newlines on function returns (#864)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-04 21:46:36 +02:00
Ettore Di Giacinto
4aa5dac768 feat: update integer, number and string rules - allow primitives as root types (#862)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-03 23:32:30 +02:00
renovate[bot]
08b59b5cc5 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 50cee77 (#861)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-03 19:08:04 +02:00
ci-robbot [bot]
6b900e28cd ⬆️ Update nomic-ai/gpt4all (#859)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-03 19:07:53 +02:00
Ettore Di Giacinto
5ca21ee398 feat: add ngqa and RMSNormEps parameters (#860)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-03 00:51:08 +02:00
renovate[bot]
953e30814a fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to c449b71 (#858)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-03 00:20:45 +02:00
renovate[bot]
a65344cf25 fix(deps): update github.com/tmc/langchaingo digest to 271e9bd (#857)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-03 00:20:22 +02:00
Dave
7fb8b4191f feat: "simple" chat/edit/completion template system prompt from config (#856) 2023-08-03 00:19:55 +02:00
Tyler Harpool
fc8aec7324 Update to working k8sgpt + localai example in documentation (#852) 2023-08-01 22:31:36 +02:00
Ettore Di Giacinto
c309aac8f5 fix(gallery): use inline YAML (#851)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-08-01 19:09:32 +02:00
Ettore Di Giacinto
1e37ec727d Revert "⬆️ Update go-skynet/go-llama.cpp" (#850) 2023-08-01 19:09:18 +02:00
ci-robbot [bot]
ae36bae59d ⬆️ Update nomic-ai/gpt4all (#847)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-08-01 00:48:10 +02:00
renovate[bot]
e663beebf0 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to cbdcde8 (#833)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-08-01 00:47:06 +02:00
Ettore Di Giacinto
9d0292e9e1 Update README.md 2023-08-01 00:42:56 +02:00
Ettore Di Giacinto
fe27bb7982 feat: Update logo (#849) 2023-08-01 00:31:40 +02:00
Ettore Di Giacinto
d603a9cbb5 fix(gallery): preload from file should by in YAML format (#846)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-31 21:13:16 +02:00
Ettore Di Giacinto
c1fc22e746 fix(examples): use pinned versions in the k8sgpt example (#845)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-31 19:15:57 +02:00
renovate[bot]
85d3710924 fix(deps): update github.com/tmc/langchaingo digest to 8f10160 (#843)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-31 19:15:13 +02:00
ci-robbot [bot]
a0324245f1 ⬆️ Update nomic-ai/gpt4all (#841)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-31 19:14:56 +02:00
Dave
ce8e9dc690 feature: model list :: filter query string parameter (#830) 2023-07-31 19:14:32 +02:00
Andrew Zigler
32ca7efbeb 📝 Add OpenOps to README's project list (#832)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-07-30 15:30:14 +02:00
longy2k
27520eb169 Added "BMO Chatbot" to "Projects already using LocalAI to run local models" section." (#828) 2023-07-30 15:29:23 +02:00
Andrew
9843adb4f1 Create .gitattributes to force git clone to keep the LF line endings on .sh files (#838) 2023-07-30 15:27:43 +02:00
Dave
8e8d474ae8 refactor: Remove remaining uses of depreciated package io/ioutil (#837) 2023-07-30 11:23:43 +00:00
renovate[bot]
6151ea1c4d fix(deps): update github.com/tmc/langchaingo digest to 7df4fe5 (#826)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-30 09:49:12 +02:00
renovate[bot]
d969025f87 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 8c51308 (#822)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-30 09:48:52 +02:00
ci-robbot [bot]
18e1cb9c92 ⬆️ Update nomic-ai/gpt4all (#825)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-30 09:48:30 +02:00
ci-robbot [bot]
e7ceb9e8f5 ⬆️ Update go-skynet/go-llama.cpp (#824)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-30 09:48:10 +02:00
renovate[bot]
3a4675c8c3 fix(deps): update module github.com/rs/zerolog to v1.30.0 (#836)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-30 09:47:49 +02:00
Dave
5ce0f216cf Fix: Model Gallery Downloads (#835) 2023-07-30 09:47:22 +02:00
Ettore Di Giacinto
688f150463 fix: symlink libphonemize in the container (#831) 2023-07-29 12:47:34 +02:00
Ettore Di Giacinto
00ccb8d4f1 fix: set default rope freq base to 10000 during model load
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-29 10:40:56 +02:00
Ettore Di Giacinto
e70b91aaef tests: set a small context_size
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-29 10:29:47 +02:00
Dave
8b90ac2b1a 1000 -> 10,000 for ropeFreqBase?
the error message talks about a default of 10k, so setting this to 10k instead of 1k experimentally.
2023-07-29 02:37:24 -04:00
Ettore Di Giacinto
f085baa77d fix: set default rope if not specified
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-29 01:07:16 +02:00
Ettore Di Giacinto
fa4de05c14 fix: symlink libphonemize in the container
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-28 19:40:21 +02:00
Ettore Di Giacinto
dde12b492b fix: select function calls if 'name' is set in the request (#827)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-28 01:17:11 +02:00
Ettore Di Giacinto
096d98c3d9 fix: add rope settings during model load, fix CUDA (#821)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-27 21:56:05 +02:00
renovate[bot]
147cae9ed8 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 39acbc8 (#817)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-27 18:56:59 +02:00
renovate[bot]
c63709014b fix(deps): update github.com/go-skynet/go-llama.cpp digest to 6ba16de (#820)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-27 18:56:39 +02:00
Wendy Liga
9b307799ce fix missing openai_api_base on langchain-chroma example (#818) 2023-07-27 18:41:53 +02:00
renovate[bot]
78e36779cf fix(deps): update module google.golang.org/grpc to v1.57.0 (#815)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-27 18:41:29 +02:00
ci-robbot [bot]
90ae35e2e4 ⬆️ Update nomic-ai/gpt4all (#814)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-27 18:41:15 +02:00
Ettore Di Giacinto
b96e30e66c fix: use bytes in gRPC proto instead of strings (#813)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-27 18:41:04 +02:00
renovate[bot]
0af0df7423 fix(deps): update module github.com/sashabaranov/go-openai to v1.14.1 (#783)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-27 18:40:50 +02:00
renovate[bot]
0883d324d9 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 562d2b5 (#766)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-26 22:06:05 +02:00
renovate[bot]
77597e6a16 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 9100b2e (#753)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-26 22:05:55 +02:00
renovate[bot]
eae6b36d03 fix(deps): update github.com/donomii/go-rwkv.cpp digest to c898cd0 (#748)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-26 22:05:42 +02:00
renovate[bot]
c4bc7c41b1 fix(deps): update github.com/tmc/langchaingo digest to 7d5f9fd (#768)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-26 22:05:32 +02:00
ci-robbot [bot]
c79ddd6fc4 ⬆️ Update nomic-ai/gpt4all (#807)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-25 23:03:02 +02:00
Dave
ae58fb8821 fix: update gitignore and make clean (#798)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-07-25 23:02:46 +02:00
Ettore Di Giacinto
569c1d1163 feat: add rope settings and negative prompt, drop grammar backend (#797)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-25 19:05:27 +02:00
Aman Gupta Karmani
12fe0932c4 feat: cancel stream generation if client disappears (#792) 2023-07-24 23:10:54 +02:00
finger42
72e3e236de Added CPU information to entrypoint.sh (#794) 2023-07-23 19:27:55 +00:00
Ettore Di Giacinto
ab59b238b3 fix: update README 2023-07-23 18:58:24 +02:00
ci-robbot [bot]
bed9570e48 ⬆️ Update nomic-ai/gpt4all (#785)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-23 09:51:42 +02:00
Dave
c6bf67f446 feat(llama2): add template for chat messages (#782)
Co-authored-by: Aman Karmani <aman@tmm1.net>

Lays some of the groundwork for LLAMA2 compatibility as well as other future models with complex prompting schemes.

Started small refactoring in pkg/model/loader.go regarding template loading. Currently still a part of ModelLoader, but should be easy to add template loading for situations other than overall prompt templates and the new chat-specific per-message templates
Adds support for new chat-endpoint-specific, per-message templates as an alternative to the existing Role: XYZ sprintf method.
Includes a temporary prompt template as an example, since I have a few questions before we merge in the model-gallery side changes (see )
Minor debug logging changes.
2023-07-22 11:31:39 -04:00
ci-robbot [bot]
5ee186b8e5 ⬆️ Update go-skynet/go-llama.cpp (#723)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-22 00:55:33 +02:00
Ettore Di Giacinto
94817b557c fix: make completions endpoint more close to OpenAI specification (#790)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-22 00:53:52 +02:00
Ettore Di Giacinto
26e1496075 Update README.md 2023-07-21 23:10:02 +02:00
Ettore Di Giacinto
92fca8ae74 ci: release space before build
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-21 22:56:43 +02:00
Stepan
7fa5b8401d [Telegram-bot example] Fix lint for command docker-compose (#787)
Co-authored-by: Stepan Zhashkov <steven.z@spectral-team.com>
2023-07-21 20:56:04 +02:00
Ettore Di Giacinto
0eac0402e1 feat: backends improvements (#778) 2023-07-21 20:55:49 +02:00
Ettore Di Giacinto
c71c729bc2 debug 2023-07-21 10:53:26 +02:00
Ettore Di Giacinto
e459f114cd fix: fix tests, small refactors
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-20 23:52:04 +02:00
Ettore Di Giacinto
982a7e86a8 feat: add huggingface embeddings backend
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-20 22:10:42 +02:00
Ettore Di Giacinto
94916749c5 feat: add external grpc and model autoloading 2023-07-20 22:10:12 +02:00
Ettore Di Giacinto
5ce5f87a26 fix: move metal file to grpcs assets (#777)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-20 22:00:07 +02:00
Ettore Di Giacinto
1d2ae46ddc tests: clean up logs
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-20 01:36:34 +02:00
ci-robbot [bot]
71ac331f90 ⬆️ Update nomic-ai/gpt4all (#775)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-20 01:22:44 +02:00
Ettore Di Giacinto
47cc95fc9f feat: add all backends to autoload
Now since gRPCs are not crashing the main thread we can just greedly
attempt all the backends we have available.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-20 00:40:28 +02:00
Ettore Di Giacinto
3feb632eb4 refactor: rename "llama-master" and "llama" (#776)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-20 00:36:16 +02:00
Ettore Di Giacinto
236497e331 feat: resolve JSONSchema refs (planners) (#774) 2023-07-19 22:56:13 +02:00
ci-robbot [bot]
a38dc497b2 ⬆️ Update go-skynet/go-llama.cpp (#770)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-19 19:44:33 +02:00
ci-robbot [bot]
28ed52fa94 ⬆️ Update nomic-ai/gpt4all (#769)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-19 19:44:21 +02:00
Enzo Einhorn
e995b95c94 [build] pass build type to cmake on libtransformers.a build (#741)
Co-authored-by: Enzo Einhorn <enzo.einhorn@hiventive.com>
2023-07-18 19:04:19 +02:00
Ettore Di Giacinto
8379cce209 example(functions): Add OpenAI functions example (#767) 2023-07-18 00:04:21 +02:00
ci-robbot [bot]
3c6b798522 ⬆️ Update nomic-ai/gpt4all (#759)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-17 23:58:40 +02:00
ci-robbot [bot]
c18770a61a ⬆️ Update go-skynet/go-bert.cpp (#758)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-17 23:58:25 +02:00
Ettore Di Giacinto
6352448b72 feat: add llama-master backend (#752)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-17 23:58:15 +02:00
renovate[bot]
fb6cce487f fix(deps): update module github.com/gofiber/fiber/v2 to v2.48.0 (#757)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-16 22:02:30 +02:00
renovate[bot]
3079cc4167 fix(deps): update github.com/go-skynet/go-bert.cpp digest to 6abe312 (#756)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-16 22:01:53 +02:00
ci-robbot [bot]
27ef8b1eb7 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#711)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-16 09:57:16 +02:00
ci-robbot [bot]
c00435d72b ⬆️ Update nomic-ai/gpt4all (#735)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-16 09:57:00 +02:00
Ettore Di Giacinto
d0e67cce75 fix: make last stream message to send empty content
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-16 00:09:28 +02:00
renovate[bot]
6ec315e540 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 6c97625 (#733)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 22:53:41 +02:00
renovate[bot]
cf4e6f909c fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to cfd70b6 (#734)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 22:53:28 +02:00
renovate[bot]
b3a99166fd fix(deps): update github.com/tmc/langchaingo digest to dcf7ecd (#736)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 22:53:18 +02:00
renovate[bot]
107008331e fix(deps): update github.com/mudler/go-ggllm.cpp digest to 862477d (#745)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 22:53:05 +02:00
ci-robbot [bot]
accd9f9044 ⬆️ Update donomii/go-rwkv.cpp (#750)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-15 22:52:45 +02:00
Ettore Di Giacinto
17294ae5e5 fix: make first stream message to send empty content (#751)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 22:50:52 +02:00
renovate[bot]
3c3a9b765a fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to ffb09d7 (#744)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 16:16:13 +02:00
renovate[bot]
526c5bcdad fix(deps): update module gopkg.in/yaml.v2 to v3 (#299)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 16:15:40 +02:00
renovate[bot]
a1bbe75d43 fix(deps): update module github.com/sashabaranov/go-openai to v1.14.0 (#739)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 16:15:17 +02:00
renovate[bot]
572a311639 fix(deps): update module google.golang.org/protobuf to v1.31.0 (#746)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-15 16:14:49 +02:00
Ettore Di Giacinto
cb5d6f6e3a ci: track updates for new deps
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 10:04:09 +02:00
Ettore Di Giacinto
e3cabb555d feat: gRPC-based backends (#743) 2023-07-15 09:50:43 +02:00
Ettore Di Giacinto
f193f56564 fix: fix copy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
c0a91ab548 fix: fix LDFLAGS for rwkv.cpp
Previously the libs were added by other deps that made the linker add
those as well (by chance).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
26e510bf28 fix: Makefile
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
98e73ed67a fix: CI fixes
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
7f3de3ca4a fix: fix makefile error
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
189cb3a7be feat: run all tests
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
1d0ed95a54 feat: move other backends to grpc
This finally makes everything more consistent

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
5dcfdbe51d feat: various refactorings
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
f2f1d7fe72 feat: use gRPC for transformers
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
ae533cadef feat: move gpt4all to a grpc service
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
58f6aab637 feat: move llama to a grpc
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
Ettore Di Giacinto
b816009db0 feat: add falcon ggllm via grpc client
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2023-07-15 01:19:43 +02:00
ci-robbot [bot]
a84dee1be1 ⬆️ Update nomic-ai/gpt4all (#705)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-07-09 16:55:56 +02:00
renovate[bot]
30e4ddbf10 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 3fec197 (#706)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-09 16:52:26 +02:00
Ettore Di Giacinto
296a5b6707 Update README with sponsors section (#732) 2023-07-09 14:14:54 +02:00
renovate[bot]
b0520dcb59 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to d611d10 (#699)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-09 13:40:23 +02:00
renovate[bot]
f42967ed86 fix(deps): update github.com/go-skynet/go-llama.cpp digest to ffa57fb (#707)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-09 13:40:14 +02:00
renovate[bot]
966675c8e3 fix(deps): update github.com/tmc/langchaingo digest to a875e6b (#700)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-09 13:40:02 +02:00
renovate[bot]
f68df1624b fix(deps): update module github.com/otiai10/openaigo to v1.5.2 (#731)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-09 13:39:51 +02:00
renovate[bot]
42cade808b fix(deps): update module github.com/sashabaranov/go-openai to v1.13.0 (#667)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-07-09 13:39:41 +02:00
Ettore Di Giacinto
d59211982b Create FUNDING.yml (#725) 2023-07-09 13:39:00 +02:00
Ettore Di Giacinto
7aaa10680d feat: LocalAI functions (#726) 2023-07-09 13:38:46 +02:00
mudler
dcf35dd25f Fixup custom role encoding
Signed-off-by: mudler <mudler@localai.io>
2023-07-09 11:13:19 +02:00
mudler
e70322676c Allow to customize no action behavior
Signed-off-by: mudler <mudler@localai.io>
2023-07-09 10:53:46 +02:00
mudler
b3f43ab938 Add a way to disable default action 2023-07-09 10:02:21 +02:00
mudler
bbc4468908 Make functions more compatible with OpenAI specs 2023-07-09 10:02:09 +02:00
mudler
4de7f55f2f Make REBUILD=false default behavior
Add notice to documentation

Signed-off-by: mudler <mudler@localai.io>
2023-07-07 00:29:14 +02:00
mudler
def23e4ee2 Remove .git from .dockerignore
Signed-off-by: mudler <mudler@localai.io>
2023-07-06 21:25:10 +02:00
mudler
55befe396a Add grammar_json to the request parameters to facilitate JSON generation 2023-07-06 19:08:04 +02:00
mudler
483fddccf9 minor fixups 2023-07-06 11:55:19 +02:00
mudler
c4495ad8f2 invoke go mod clean before rebuilds 2023-07-05 18:24:55 +02:00
mudler
05aed255db Customize function call in templates 2023-07-05 18:24:44 +02:00
mudler
0f1326b2bd fixups 2023-07-04 23:40:22 +02:00
mudler
1668489b00 Add comments 2023-07-04 19:02:02 +02:00
mudler
7dd292cbb3 feat: add a way to test grammar from forks 2023-07-04 18:58:19 +02:00
mudler
c0578031b5 Add tests
Signed-off-by: mudler <mudler@localai.io>
2023-07-04 18:58:19 +02:00
mudler
a5b64b6a41 wip: test go-llama.cpp version
It also needs a llama.cpp with grammar branch + rebased on current
master
2023-07-04 18:58:19 +02:00
mudler
b722e7eb7e feat: cleanups, small enhancements
Signed-off-by: mudler <mudler@localai.io>
2023-07-04 18:58:19 +02:00
mudler
6d19a8bdb5 fix: copy git to correctly display version in /version 2023-07-04 18:58:19 +02:00
mudler
f09ddd2983 feat: add grammar and functions call support 2023-07-04 18:58:19 +02:00
Luis López
a6839fd238 feat: [whisper] Partial support for verbose_json format in transcribe endpoint (#721) 2023-07-04 14:31:31 +02:00
Ettore Di Giacinto
f3063f98d3 Update README.md 2023-07-03 00:52:26 +02:00
Ettore Di Giacinto
70674d3c58 fix(deps): bump go-llama.cpp (#719)
Signed-off-by: mudler <mudler@localai.io>
2023-07-03 00:17:48 +02:00
ci-robbot [bot]
3829aba869 ⬆️ Update nomic-ai/gpt4all (#704)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-30 10:30:39 +02:00
Ettore Di Giacinto
92614b91d7 fix: split build threads from running threads (#703)
Signed-off-by: mudler <mudler@localai.io>
2023-06-29 11:57:09 +02:00
Ettore Di Giacinto
bf5acf646e fix: adapt whisper to bindings updates (#702)
Signed-off-by: mudler <mudler@localai.io>
2023-06-29 11:26:07 +02:00
renovate[bot]
0780be022c fix(deps): update github.com/go-skynet/go-llama.cpp digest to 42ba448 (#698)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-28 23:44:04 +02:00
renovate[bot]
c756b5d054 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 85ed71a (#669)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-28 23:43:50 +02:00
ci-robbot [bot]
e3db6496d7 ⬆️ Update go-skynet/go-llama.cpp (#697)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-28 23:43:29 +02:00
renovate[bot]
1f1c95c618 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to a67f813 (#653)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-28 23:43:19 +02:00
renovate[bot]
5ea032cf81 fix(deps): update github.com/tmc/langchaingo digest to e510561 (#695)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-28 23:43:07 +02:00
ci-robbot [bot]
1e6542a5ca ⬆️ Update ggerganov/whisper.cpp (#696)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-28 23:42:57 +02:00
ci-robbot [bot]
218e7bc8df ⬆️ Update nomic-ai/gpt4all (#691)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-28 23:42:46 +02:00
Ettore Di Giacinto
a06e467a1a Update README.md (#694) 2023-06-28 19:38:35 +02:00
mudler
730645b3c6 Update README 2023-06-28 19:27:56 +02:00
mudler
3dd632fd5a Update .env 2023-06-28 18:28:53 +02:00
renovate[bot]
365d4d3756 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 8e31841 (#685)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-28 11:46:36 +02:00
renovate[bot]
d22053a5e6 fix(deps): update github.com/tmc/langchaingo digest to 633853b (#689)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-28 08:46:51 +02:00
renovate[bot]
e3ac561d30 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 5827536 (#693)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-28 08:45:05 +02:00
ci-robbot [bot]
69367a7948 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#692)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-27 23:54:51 +02:00
ci-robbot [bot]
85a38a8122 ⬆️ Update go-skynet/go-llama.cpp (#690)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-27 23:48:52 +02:00
Ettore Di Giacinto
d2cf1954fc fix: detect TARGETARCH if not present (#688)
Signed-off-by: mudler <mudler@localai.io>
2023-06-27 16:31:02 +02:00
renovate[bot]
70712e3445 fix(deps): update github.com/go-skynet/go-llama.cpp digest to f104111 (#630)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-27 09:01:25 +02:00
ci-robbot [bot]
85eea1189e ⬆️ Update ggerganov/whisper.cpp (#682)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-27 09:01:09 +02:00
ci-robbot [bot]
ed2344ab9b ⬆️ Update nomic-ai/gpt4all (#681)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-27 09:00:57 +02:00
Samuel Maynard
935bd51510 Dockerfile: adds a warning if $TARGETARCH or $TARGETVARIANT missing (#686) 2023-06-27 09:00:43 +02:00
Ettore Di Giacinto
3593cb0c87 feat: update llama, enable NUMA (#684) 2023-06-27 09:00:10 +02:00
Samuel Maynard
e130b208ab Docker preserve sources (#658) 2023-06-26 22:34:03 +02:00
Ettore Di Giacinto
02136531a3 fix: return index and delta in stream token (#680)
Signed-off-by: mudler <mudler@localai.io>
2023-06-26 18:49:36 +02:00
Ettore Di Giacinto
d3a486a4f8 feat: Add '/version' endpoint and display it in the CLI (#679) 2023-06-26 15:12:43 +02:00
Ettore Di Giacinto
2b957df56c fix: rename /models/list to /models/available (#678) 2023-06-26 15:12:26 +02:00
Matthew Koski
c2dec387aa Update entrypoint.sh to use $THREADS for parallel compilation on start if present (#673) 2023-06-26 14:48:24 +02:00
ci-robbot [bot]
a1ed6fbd96 ⬆️ Update ggerganov/whisper.cpp (#672)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-26 12:26:02 +02:00
renovate[bot]
ad81e37672 fix(deps): update github.com/tmc/langchaingo digest to 7ea7345 (#675)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-26 12:25:50 +02:00
Ettore Di Giacinto
78f3c3da48 refactor: consolidate usage of GetURI (#674)
Signed-off-by: mudler <mudler@localai.io>
2023-06-26 12:25:38 +02:00
mudler
d18f85df46 fix: add tags
Signed-off-by: mudler <mudler@localai.io>
2023-06-25 23:03:58 +02:00
Ettore Di Giacinto
6213da330a fix: add omitempty where needed (#671) 2023-06-25 22:51:02 +02:00
renovate[bot]
53f8d73101 Update github.com/tmc/langchaingo digest to 4d9d55d (#665)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-25 16:35:59 +02:00
renovate[bot]
2cfc9a2706 Update github.com/go-skynet/go-ggml-transformers.cpp digest to a459d27 (#629)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-25 12:33:53 +02:00
ci-robbot [bot]
0ba94bf33f ⬆️ Update nomic-ai/gpt4all (#668)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-25 09:26:17 +02:00
renovate[bot]
06570d1e41 fix(deps): update module github.com/otiai10/openaigo to v1.4.0 (#666)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-24 20:48:54 +02:00
ci-robbot [bot]
be1667c387 ⬆️ Update nomic-ai/gpt4all (#657)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-24 08:33:52 +02:00
ci-robbot [bot]
eb39d908d0 ⬆️ Update go-skynet/go-llama.cpp (#634)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-24 08:33:40 +02:00
Ettore Di Giacinto
60db5957d3 Gallery repository (#663)
Signed-off-by: mudler <mudler@localai.io>
2023-06-24 08:18:17 +02:00
Ettore Di Giacinto
2a45a99737 example(slack-qa-bot): Add slack QA bot example (#654)
Signed-off-by: mudler <mudler@localai.io>
2023-06-22 18:07:15 +02:00
renovate[bot]
91a67d5ee0 fix(deps): update module github.com/sashabaranov/go-openai to v1.11.3 (#606)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-22 17:54:05 +02:00
ci-robbot [bot]
55cf9d5792 ⬆️ Update nomic-ai/gpt4all (#650)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-22 17:53:32 +02:00
Ettore Di Giacinto
a7bb029d23 feat: add tts with go-piper (#649)
Signed-off-by: mudler <mudler@localai.io>
2023-06-22 17:53:10 +02:00
ci-robbot [bot]
cc31c58235 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#644)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-21 08:58:20 +02:00
renovate[bot]
4e831307a8 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 09ae04c (#632)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-21 08:58:08 +02:00
ci-robbot [bot]
445067f6ad ⬆️ Update donomii/go-rwkv.cpp (#600)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-21 08:57:15 +02:00
ci-robbot [bot]
11bfd0de76 ⬆️ Update nomic-ai/gpt4all (#635)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-21 08:56:41 +02:00
mudler
dc7b8ad23b Update README 2023-06-20 23:32:45 +02:00
Ettore Di Giacinto
2f5feb4841 Add LowVRAM option parameter (#642) 2023-06-20 20:33:47 +02:00
renovate[bot]
4e3c319e83 fix(deps): update module github.com/valyala/fasthttp to v1.48.0 (#639)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-20 16:24:29 +02:00
ci-robbot [bot]
d0025a7483 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#633)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-20 08:47:21 +02:00
ci-robbot [bot]
db0b29be51 ⬆️ Update nomic-ai/gpt4all (#628)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-20 00:12:24 +02:00
Ettore Di Giacinto
7da07e8af9 example(k8sgpt): Add k8sgpt example (#631)
Signed-off-by: mudler <mudler@localai.io>
2023-06-20 00:11:01 +02:00
renovate[bot]
6da892758b fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 2b6cc99 (#627)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-19 23:46:03 +02:00
renovate[bot]
5e88930475 fix(deps): update github.com/tmc/langchaingo digest to 1b3da44 (#583)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-19 23:45:49 +02:00
renovate[bot]
97b02f9765 fix(deps): update github.com/donomii/go-rwkv.cpp digest to f5a8c45 (#609)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-19 19:20:19 +02:00
renovate[bot]
7ee1b10dfb fix(deps): update module github.com/urfave/cli/v2 to v2.25.7 (#577)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-19 19:20:06 +02:00
renovate[bot]
3932c15823 fix(deps): update module github.com/gofiber/fiber/v2 to v2.47.0 (#625)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-19 17:16:03 +02:00
renovate[bot]
618fd1d417 fix(deps): update module github.com/otiai10/openaigo to v1.2.0 (#624)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-19 14:32:26 +02:00
renovate[bot]
151a6cf4c2 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 32b9223 (#617)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-19 00:09:44 +02:00
ci-robbot [bot]
1766de814c ⬆️ Update go-skynet/go-ggml-transformers.cpp (#619)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-18 23:49:38 +02:00
ci-robbot [bot]
0b351d6da2 ⬆️ Update nomic-ai/gpt4all (#613)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-18 23:48:07 +02:00
renovate[bot]
6623ce9942 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to fd419ca (#623)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-18 22:50:58 +02:00
renovate[bot]
1dbc190fa6 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 7ad833b (#616)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-18 21:46:03 +02:00
renovate[bot]
46b9445fa6 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 42e8049 (#594)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-18 14:42:46 +02:00
Ettore Di Giacinto
d3d3187e51 feat: fix CUDA images and update go-llama to use full GPU offloading (#618)
Signed-off-by: mudler <mudler@localai.io>
Co-authored-by: mudler <mudler@localai.io>
2023-06-18 08:27:29 +02:00
Ettore Di Giacinto
6c94f3cd67 Revert "Docker preserve sources" (#620) 2023-06-17 23:22:04 +02:00
Ettore Di Giacinto
295f3030a9 feat: add typical_p to model parameters (#598)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-14 19:33:20 +02:00
renovate[bot]
1ba88258a9 fix(deps): update github.com/donomii/go-rwkv.cpp digest to a57bca3 (#582)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-14 19:14:50 +02:00
Ettore Di Giacinto
10ddd72b58 fix: set default batch size (#597) 2023-06-14 19:09:27 +02:00
Ettore Di Giacinto
1b7990d5d9 deps: switch back to nomic-ai/gpt4all (#595) 2023-06-14 18:07:05 +02:00
renovate[bot]
9f50b8024d fix(deps): update github.com/go-skynet/go-llama.cpp digest to a796025 (#593)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-14 16:28:13 +02:00
Samuel Maynard
7b9dcb05d4 Docker preserve sources (#590) 2023-06-14 13:26:27 +02:00
Ettore Di Giacinto
e37361985c deps: update gpt4all bindings, fix search path on new versions (#592) 2023-06-14 13:24:53 +02:00
ci-robbot [bot]
467e88d305 ⬆️ Update donomii/go-rwkv.cpp (#527)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-14 12:56:20 +02:00
renovate[bot]
fe4a8fbc74 fix(deps): update module github.com/otiai10/openaigo to v1.1.1 (#591)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-14 12:14:47 +02:00
renovate[bot]
2328bbaea1 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 01b8436 (#558)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-14 09:34:14 +02:00
renovate[bot]
4cc834adcd fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 8953b7f (#573)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-14 07:06:20 +02:00
renovate[bot]
5e49ff5072 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 5f16204 (#563)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-14 07:06:09 +02:00
ci-robbot [bot]
f98680a18a ⬆️ Update go-skynet/go-llama.cpp (#584)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-13 23:05:03 +02:00
Ettore Di Giacinto
2880221bb3 ci: drop macOS dependencies (#581)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-13 09:31:11 +02:00
Samuel Maynard
27887c74d8 Dockerfile: unify duplicated requirements into single step (#580) 2023-06-13 08:39:38 +02:00
ci-robbot [bot]
6306885fe7 ⬆️ Update go-skynet/go-llama.cpp (#561)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-11 15:44:06 +02:00
Ettore Di Giacinto
2a11f16c0f fix: copy metal file from build (#564) 2023-06-11 01:07:39 +02:00
Ettore Di Giacinto
2297504fb3 example(telegram): drop mongodb (#565) 2023-06-11 01:07:19 +02:00
ci-robbot [bot]
897ac6e4e5 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#562)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-11 01:01:46 +02:00
renovate[bot]
f20c12a1c0 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to a9c2f47 (#560)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-10 22:22:31 +02:00
renovate[bot]
5dea31385c fix(deps): update github.com/go-skynet/go-llama.cpp digest to a12ce51 (#555)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-10 10:17:32 +02:00
renovate[bot]
58f0f63926 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to d3ba129 (#548)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-10 10:17:11 +02:00
renovate[bot]
ed2bf48a6d fix(deps): update github.com/tmc/langchaingo digest to 06cb7b5 (#525)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-10 10:16:50 +02:00
ci-robbot [bot]
e6c8ebb65c ⬆️ Update go-skynet/go-llama.cpp (#554)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-10 01:35:58 +02:00
renovate[bot]
119733892e fix(deps): update github.com/go-skynet/go-bert.cpp digest to 6069103 (#534)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-10 00:09:38 +02:00
ci-robbot [bot]
437f563128 ⬆️ Update go-skynet/go-bert.cpp (#540)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-10 00:09:14 +02:00
renovate[bot]
ecad2261c8 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to dabd6cd (#535)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-09 21:01:09 +02:00
renovate[bot]
182323a7fb fix(deps): update github.com/donomii/go-rwkv.cpp digest to d2b25a4 (#552)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-09 19:16:53 +02:00
renovate[bot]
30d06f9b12 fix(deps): update github.com/go-skynet/go-llama.cpp digest to a921173 (#551)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-09 18:15:24 +02:00
ci-robbot [bot]
6bb562272d ⬆️ Update go-skynet/go-llama.cpp (#546)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-09 01:13:15 +02:00
Ettore Di Giacinto
3b3164b039 examples(telegram): add (#547) 2023-06-09 00:45:44 +02:00
renovate[bot]
6f0bdbd01c fix(deps): update github.com/donomii/go-rwkv.cpp digest to fb8b955 (#533)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-09 00:23:19 +02:00
renovate[bot]
ce2a1799ab fix(deps): update github.com/go-skynet/go-llama.cpp digest to 672fb05 (#529)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-09 00:22:41 +02:00
renovate[bot]
d088bd3034 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 47fbc0e (#528)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-09 00:20:15 +02:00
ci-robbot [bot]
806e4c3a63 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#539)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-09 00:19:58 +02:00
renovate[bot]
8532ce2002 fix(deps): update module github.com/sashabaranov/go-openai to v1.10.1 (#544)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-08 21:33:32 +02:00
Ettore Di Giacinto
84946e9275 feat: display download progress when installing models (#543) 2023-06-08 21:33:18 +02:00
Ettore Di Giacinto
c9bbba4872 tests: add llama tests with openllama (#538)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-08 00:36:11 +02:00
Ettore Di Giacinto
ea9a651573 examples: simplify langchain-python (#541) 2023-06-08 00:05:17 +02:00
Ettore Di Giacinto
5abbb134d9 feat: extend model configuration for llama.cpp (#536) 2023-06-07 21:46:19 +02:00
renovate[bot]
694dd4ad9e fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to bd765bb (#518)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-07 09:08:51 +02:00
renovate[bot]
4af48e548a fix(deps): update github.com/donomii/go-rwkv.cpp digest to d5f48f6 (#526)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-07 09:08:35 +02:00
Ettore Di Giacinto
079dc197c7 Update README.md 2023-06-06 23:44:23 +02:00
renovate[bot]
77613169da fix(deps): update github.com/go-skynet/go-llama.cpp digest to 37ef81d (#523)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-06 20:54:55 +02:00
ci-robbot [bot]
2630e251ce ⬆️ Update ggerganov/whisper.cpp (#520)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-06 19:16:42 +02:00
ci-robbot [bot]
0909a0637e feat: update llama.cpp to support k-quants (#521)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-06 18:15:17 +02:00
Ettore Di Giacinto
d62aef2016 feat: add experimental support for falcon-7b (#516)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-06 17:23:19 +02:00
ci-robbot [bot]
25e9483add ⬆️ Update donomii/go-rwkv.cpp (#511)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-06 16:02:09 +02:00
renovate[bot]
c1be2bdeeb fix(deps): update github.com/donomii/go-rwkv.cpp digest to 1e18b24 (#489)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-06 15:34:18 +02:00
renovate[bot]
49a2b30350 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 57543c1 (#514)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-06 11:07:13 +02:00
renovate[bot]
472cd0fc2f fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 266f13a (#500)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-06 10:04:44 +02:00
renovate[bot]
dc9c43b6dd fix(deps): update github.com/go-skynet/go-llama.cpp digest to cca84ed (#513)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-06 10:04:17 +02:00
renovate[bot]
e1e23a6302 fix(deps): update github.com/mudler/go-stable-diffusion digest to d89260f (#506)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-06 00:35:16 +02:00
ci-robbot [bot]
2e916abe15 ⬆️ Update go-skynet/go-llama.cpp (#512)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-06 00:35:01 +02:00
renovate[bot]
3ebdb9b67e fix(deps): update module github.com/sashabaranov/go-openai to v1.10.0 (#510)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-05 21:24:04 +02:00
renovate[bot]
01f5046caf fix(deps): update github.com/tmc/langchaingo digest to 4afed6d (#508)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-05 17:22:21 +02:00
renovate[bot]
ac17d544e0 fix(deps): update github.com/go-skynet/go-llama.cpp digest to b1a4256 (#505)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-05 17:21:57 +02:00
Ettore Di Giacinto
b447a2a719 feat: support upscaled image generation with esrgan (#509) 2023-06-05 17:21:38 +02:00
Ettore Di Giacinto
ec4fd1d219 fix gpt4all, add metal GPU support (#507) 2023-06-05 14:26:20 +02:00
Ettore Di Giacinto
b503725dc7 fix: downgrade gpt4all (#503)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-05 09:42:50 +02:00
ci-robbot [bot]
e873fc7b71 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#501)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-05 00:07:48 +02:00
renovate[bot]
3070e9503a fix(deps): update github.com/go-skynet/bloomz.cpp digest to 1834e77 (#414)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-04 22:53:09 +02:00
Ettore Di Giacinto
d9130def39 fix: correctly assign ffmpeg image tag (#499) 2023-06-04 21:07:12 +02:00
renovate[bot]
cdf0a6e766 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to bbe195e (#497)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-04 19:37:08 +02:00
renovate[bot]
a0e0ac887f fix(deps): update github.com/go-skynet/go-bert.cpp digest to 0548994 (#451)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-04 19:36:56 +02:00
Ettore Di Giacinto
4ddc956462 deps: update rwkv, switch back to upstream (#494) 2023-06-04 17:25:35 +02:00
renovate[bot]
203fd7b2e8 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 6fb862c (#490)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-04 14:16:09 +02:00
Ettore Di Giacinto
1bb85377e4 feat: add ffmpeg images (#492)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-04 14:00:21 +02:00
renovate[bot]
3892fafc2d fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to bc624f5 (#486)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-04 01:57:20 +02:00
renovate[bot]
8a34679a13 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 3f10005 (#485)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-04 01:57:10 +02:00
ci-robbot [bot]
b64c1d8ac1 ⬆️ Update nomic-ai/gpt4all (#488)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-04 01:56:59 +02:00
Ettore Di Giacinto
8fb86c13bc feat: Enable static builds for Linux binaries (#487)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-03 23:46:07 +02:00
ci-robbot [bot]
05edf59c91 ⬆️ Update nomic-ai/gpt4all (#483)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-03 18:30:30 +02:00
ci-robbot [bot]
b9f1f85433 ⬆️ Update go-skynet/go-llama.cpp (#482)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-03 18:30:18 +02:00
renovate[bot]
f8e2e76698 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 25ee51e (#478)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-03 14:25:55 +02:00
ci-robbot [bot]
29856f7527 ⬆️ Update nomic-ai/gpt4all (#479)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-03 14:25:42 +02:00
Sébastien Prud'homme
aa6cdf16c8 fix: display help with correct default values (#481)
Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
2023-06-03 14:25:30 +02:00
Samuel Maynard
96794851b3 feat: add support for Stream: true to completionEndpoint (#465) 2023-06-03 00:27:03 +02:00
renovate[bot]
51a1a721b3 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to be9f6ad (#477)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-02 19:31:21 +02:00
renovate[bot]
695f3e5758 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 031d714 (#464)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-02 13:32:27 +02:00
Ettore Di Giacinto
e875c1f64a fix: fix the make run target (#476)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-02 09:57:01 +02:00
Ettore Di Giacinto
19f92d7d55 fix: Bump and fix rwkv build (#475) 2023-06-02 08:53:57 +02:00
Ettore Di Giacinto
5a8dd40918 feat: Enable stablediffusion by default in container images (#474)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-02 08:53:45 +02:00
renovate[bot]
1b766ab89c fix(deps): update module github.com/urfave/cli/v2 to v2.25.5 (#399)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-01 23:44:17 +02:00
ci-robbot [bot]
a63d6f6364 ⬆️ Update ggerganov/whisper.cpp (#473)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-01 23:44:05 +02:00
ci-robbot [bot]
4422ca2235 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#459)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-01 23:43:15 +02:00
Ettore Di Giacinto
78ad4813df feat: Update gpt4all, support multiple implementations in runtime (#472)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-06-01 23:38:52 +02:00
renovate[bot]
42d753846e fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 5b9e59b (#469)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-01 16:23:34 +02:00
ci-robbot [bot]
5c018c0437 ⬆️ Update ggerganov/whisper.cpp (#468)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-01 16:23:16 +02:00
renovate[bot]
07cee3f6ef fix(deps): update github.com/donomii/go-rwkv.cpp digest to 3b28b09 (#467)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-01 16:21:32 +02:00
ci-robbot [bot]
c5cb2ff268 ⬆️ Update go-skynet/go-bert.cpp (#463)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-06-01 16:21:13 +02:00
Aisuko
c8a4a4f4e9 feat: Add new test cases for LoadConfigs (#447)
Signed-off-by: Aisuko <urakiny@gmail.com>
2023-06-01 16:20:45 +02:00
Pavel Zloi
3ba07a5928 feat: add LangChainGo Huggingface backend (#446)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-06-01 12:00:06 +02:00
renovate[bot]
7282668da1 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 3f7436e (#466)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-01 09:59:11 +02:00
renovate[bot]
451e803444 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 10caf37 (#455)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-06-01 01:05:24 +02:00
Ettore Di Giacinto
d70c55231b docs: Update README with model gallery url 2023-06-01 01:04:07 +02:00
ci-robbot [bot]
275c124701 ⬆️ Update go-skynet/go-llama.cpp (#458)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-31 22:59:02 +02:00
ci-robbot [bot]
87a6bbd251 ⬆️ Update ggerganov/whisper.cpp (#462)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-31 22:58:44 +02:00
renovate[bot]
8fd4c7afcc fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to ce6f747 (#450)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-31 19:55:46 +02:00
Sébastien Prud'homme
eee3f83d98 ci: build Docker image variants (#456)
Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
2023-05-31 19:51:02 +02:00
renovate[bot]
28ee180283 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 17b0655 (#454)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-31 14:59:47 +02:00
renovate[bot]
432b0223f1 fix(deps): update github.com/donomii/go-rwkv.cpp digest to c43cdf5 (#453)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-31 14:59:36 +02:00
renovate[bot]
16050a32c7 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 5f94020 (#435)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-31 12:00:01 +02:00
renovate[bot]
898ca62b55 fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.7 (#445)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-31 08:49:47 +02:00
ci-robbot [bot]
5623a7c331 ⬆️ Update go-skynet/go-bert.cpp (#418)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-31 00:45:07 +02:00
ci-robbot [bot]
9e3ca6d1a3 ⬆️ Update nomic-ai/gpt4all (#422)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-31 00:44:52 +02:00
ci-robbot [bot]
fa58965bbc ⬆️ Update ggerganov/whisper.cpp (#419)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-30 23:04:53 +02:00
renovate[bot]
b8ef9028f1 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 62b6c07 (#441)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-30 23:04:34 +02:00
ci-robbot [bot]
f711d35377 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#442)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-30 23:04:10 +02:00
ci-robbot [bot]
abd3c62194 ⬆️ Update go-skynet/go-llama.cpp (#443)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-30 23:03:48 +02:00
Ettore Di Giacinto
2f3c3b1867 examples: keep old example around (#439) 2023-05-30 18:34:43 +02:00
Ettore Di Giacinto
11af09faf3 examples: use gallery in chatbot-ui, add flowise (#438)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-30 18:29:28 +02:00
Ettore Di Giacinto
577d36b596 images: cleanup, drop .dev Dockerfile (#437)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-30 15:58:10 +02:00
Ettore Di Giacinto
6d71dd7d98 fix: do not build from the same container (#434)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-30 15:53:37 +02:00
Aisuko
49ce24984c feat: Add more test-cases and remove dev container (#433)
Signed-off-by: Aisuko <urakiny@gmail.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-30 13:01:55 +02:00
Ettore Di Giacinto
f401181cb5 fix: switch back to upstream for rwkv bindings (#432) 2023-05-30 12:35:32 +02:00
renovate[bot]
ff8295a97c fix(deps): update github.com/go-skynet/go-llama.cpp digest to 4afcaf2 (#428)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-30 12:22:25 +02:00
Ettore Di Giacinto
aacb96df7a fix: correctly handle errors from App constructor (#430)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-30 12:00:30 +02:00
renovate[bot]
ca9115d6d0 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 13ccc22 (#427)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-30 11:34:13 +02:00
Ettore Di Giacinto
2c91837865 docs: fix link (#426) 2023-05-29 23:13:42 +02:00
Sébastien Prud'homme
2272324fd6 feat: add CuBLAS support in Docker images (#403)
Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
2023-05-29 23:12:27 +02:00
Sébastien Prud'homme
171b50bb1c ci: fix typo in variable name (#424)
Signed-off-by: Sébastien Prud'homme <sebastien.prudhomme@gmail.com>
2023-05-29 23:11:29 +02:00
ci-robbot [bot]
04d6bd7922 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#421)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-29 23:10:43 +02:00
ci-robbot [bot]
2abdac7003 ⬆️ Update go-skynet/bloomz.cpp (#417)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-29 23:09:42 +02:00
Ettore Di Giacinto
190f01dbe3 docs: update docs/license(clarification) and point to new website (#415)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-29 23:09:19 +02:00
renovate[bot]
18a701355c fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 695f97b (#410)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-29 16:46:36 +02:00
renovate[bot]
3911957d34 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 4bd3910 (#393)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-29 15:35:33 +02:00
Ettore Di Giacinto
f5146bde18 feat: add clblast support (#412)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-29 15:17:38 +02:00
renovate[bot]
b57ea10c94 fix(deps): update github.com/go-skynet/go-bert.cpp digest to 771b4a0 (#408)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-29 14:42:21 +02:00
renovate[bot]
821cfed6c0 fix(deps): update github.com/donomii/go-rwkv.cpp digest to ccb05c3 (#407)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-29 14:42:08 +02:00
renovate[bot]
728f297bb8 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to d7c936b (#405)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-29 09:16:53 +02:00
renovate[bot]
4c0013fd79 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 9eb81cb (#390)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-29 09:16:32 +02:00
Ettore Di Giacinto
65d06285d8 Bump rwkv (#402) 2023-05-28 22:59:25 +02:00
renovate[bot]
e0d1a8995d fix(deps): update module github.com/sashabaranov/go-openai to v1.9.5 (#400)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-28 08:40:24 +02:00
ci-robbot [bot]
425beea6c5 ⬆️ Update ggerganov/whisper.cpp (#398)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-27 22:30:24 +02:00
ci-robbot [bot]
cdfb930a69 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#385)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-27 22:30:11 +02:00
renovate[bot]
09641b9790 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 9b92684 (#392)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-27 19:42:51 +02:00
renovate[bot]
aac9a57500 fix(deps): update module github.com/imdario/mergo to v0.3.16 (#394)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-27 19:42:38 +02:00
Ettore Di Giacinto
59f7953249 docs(examples): add AutoGPT (#397) 2023-05-27 19:42:24 +02:00
Ettore Di Giacinto
217dbb448e feat: allow to set a prompt cache path and enable saving state (#395)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-27 14:29:11 +02:00
Ettore Di Giacinto
76c881043e feat: allow to preload models before startup via env var or configs (#391) 2023-05-27 09:26:33 +02:00
ci-robbot [bot]
835a20610b ⬆️ Update ggerganov/whisper.cpp (#372)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-26 22:43:11 +02:00
ci-robbot [bot]
74e808b8c3 ⬆️ Update nomic-ai/gpt4all (#389)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-26 22:28:14 +02:00
Ettore Di Giacinto
53c83f2fae image: add HEALTHCHECK (#388)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-26 18:34:02 +02:00
renovate[bot]
62365fa31d fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to a6f3e94 (#387)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-26 18:32:00 +02:00
Ettore Di Giacinto
a44c8e9b4e ci: set flakeAttempts (#386) 2023-05-26 15:28:26 +02:00
ci-robbot [bot]
320e430c7f ⬆️ Update nomic-ai/gpt4all (#384)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-26 09:57:03 +02:00
renovate[bot]
8615646827 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to d1ff713 (#383)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-26 00:43:48 +02:00
renovate[bot]
925d7c3057 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to 4f18e5e (#381)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-26 00:43:31 +02:00
renovate[bot]
e350924ac1 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to afe3870 (#382)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-26 00:00:03 +02:00
ci-robbot [bot]
e891a46740 ⬆️ Update nomic-ai/gpt4all (#362)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-25 22:46:44 +02:00
renovate[bot]
cd9285bbe6 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 63f5763 (#378)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-25 22:46:34 +02:00
Ettore Di Giacinto
917ff13c86 docs: Update README 2023-05-25 22:43:25 +02:00
Ettore Di Giacinto
2a40f44023 docs: Add Mods to projects list (#377) 2023-05-25 18:18:02 +02:00
renovate[bot]
c22d06c780 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 6e7e69a (#371)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-25 13:35:32 +02:00
ci-robbot [bot]
babbd23744 ⬆️ Update go-skynet/go-ggml-transformers.cpp (#363)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-25 00:37:36 +02:00
ci-robbot [bot]
eee41cbe2b ⬆️ Update go-skynet/go-llama.cpp (#373)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-25 00:36:57 +02:00
Ettore Di Giacinto
bf54b78270 feat: add /healthz and /readyz endpoints for kubernetes (#374) 2023-05-24 22:19:13 +02:00
renovate[bot]
589dfae89f fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 5e2b340 (#368)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-24 21:40:09 +02:00
Ettore Di Giacinto
c8cc197ddd feat: add static builds (#370) 2023-05-24 16:42:24 +02:00
Robert Gracey
76c561a908 chore: update README to include new Helm values (#369) 2023-05-24 16:27:54 +02:00
renovate[bot]
04797a80e1 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to c4c581f (#367)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-24 12:49:51 +02:00
renovate[bot]
29583a5ea5 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to b36a520 (#352)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-24 11:40:36 +02:00
renovate[bot]
d12c1f7a4a fix(deps): update github.com/go-skynet/go-llama.cpp digest to dcf8da6 (#357)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-24 11:40:16 +02:00
Al
505572dae8 Add autogpt4all LocalAI usage example to links (#259) 2023-05-24 11:39:56 +02:00
renovate[bot]
3ddea794e1 fix(deps): update github.com/go-skynet/go-ggml-transformers.cpp digest to f89d7c2 (#361)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-24 10:08:36 +02:00
renovate[bot]
10e03bde35 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 77eab3f (#356)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-23 23:10:27 +02:00
ci-robbot [bot]
e969604d75 ⬆️ Update go-skynet/go-llama.cpp (#365)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-23 23:10:06 +02:00
ci-robbot [bot]
c822e18f0d ⬆️ Update ggerganov/whisper.cpp (#364)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-23 23:09:48 +02:00
Ettore Di Giacinto
891af1c524 docs: update README 2023-05-23 22:09:51 +02:00
Ettore Di Giacinto
5807d0b766 docs: update README 2023-05-23 22:07:49 +02:00
Ettore Di Giacinto
9decd0813c feat: update go-gpt2 (#359)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-23 21:47:47 +02:00
Ettore Di Giacinto
43d3fb3eba ci: add binary releases pipelines (#358) 2023-05-23 17:12:48 +02:00
Ettore Di Giacinto
f5f8c687be examples: add privateGPT example (#355) 2023-05-23 10:32:34 +02:00
ci-robbot [bot]
9e5cd0f10b ⬆️ Update nomic-ai/gpt4all (#348)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-23 09:16:56 +02:00
renovate[bot]
231a3e7c02 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 2ce2220 (#351)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-23 00:59:48 +02:00
renovate[bot]
57172e2e30 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 5ca8767 (#350)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-23 00:06:29 +02:00
Ettore Di Giacinto
043399dd07 fix: re-enable start API message (#349)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-23 00:06:13 +02:00
renovate[bot]
6b19356740 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to c8c95ab (#344)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-22 19:04:21 +02:00
ci-robbot [bot]
1cbe6a7067 ⬆️ Update nomic-ai/gpt4all (#345)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-22 19:02:56 +02:00
Ettore Di Giacinto
2912f9870f Update README.md 2023-05-22 00:13:16 +02:00
Ettore Di Giacinto
9630be56e1 fix: make sure ca-certificates is present in the container images (#342) 2023-05-21 15:24:22 +02:00
Robert Hambrock
4aa78843c0 fix: spec compliant instantiation and termination of streams (#341) 2023-05-21 15:24:04 +02:00
renovate[bot]
b36d9f3776 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to aba1147 (#333)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-21 14:38:52 +02:00
Ettore Di Giacinto
6f54cab3f0 feat: allow to set cors (#339) 2023-05-21 14:38:25 +02:00
Ettore Di Giacinto
ed5df1e68e examples: remove threads from example models (#337) 2023-05-21 12:25:24 +02:00
mudler
3c07e11e73 docs: update README 2023-05-21 00:45:24 +02:00
mudler
91bdad1d12 docs: fix typo 2023-05-21 00:41:11 +02:00
ci-robbot [bot]
482a83886e ⬆️ Update ggerganov/whisper.cpp (#332)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-21 00:40:17 +02:00
renovate[bot]
b8f52d67e1 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 041be06 (#331)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-20 22:21:07 +02:00
renovate[bot]
9ed82199c5 fix(deps): update github.com/go-skynet/go-llama.cpp digest to ccf23ad (#330)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-20 20:42:59 +02:00
ci-robbot [bot]
864aaf8c4d ⬆️ Update go-skynet/go-llama.cpp (#327)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-20 20:42:29 +02:00
renovate[bot]
c7056756d5 fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 429b978 (#329)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-20 19:50:28 +02:00
ci-robbot [bot]
93cc8569c3 ⬆️ Update ggerganov/whisper.cpp (#326)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-20 19:50:01 +02:00
Ettore Di Giacinto
05a3d569b0 feat: allow to override model config (#323) 2023-05-20 17:03:53 +02:00
renovate[bot]
7bc08797f9 fix(deps): update module github.com/gofiber/fiber/v2 to v2.46.0 (#308)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-20 22:50:17 +10:00
renovate[bot]
5b22704799 fix(deps): update github.com/go-skynet/go-llama.cpp digest to a298043 (#322)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-20 10:55:22 +02:00
ci-robbot [bot]
9609e4392b ⬆️ Update go-skynet/go-llama.cpp (#321)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-20 10:53:22 +02:00
Aisuko
d0c033d09b feat: add PR template and stale configuration (#316)
Signed-off-by: Aisuko <urakiny@gmail.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-20 09:10:20 +02:00
Ettore Di Giacinto
4e381cbe92 feat: support shorter urls for github repositories (#314) 2023-05-20 09:06:30 +02:00
renovate[bot]
ffaf3b1d36 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 3ee537e (#313)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-20 00:30:49 +02:00
ci-robbot [bot]
465a3b755d ⬆️ Update nomic-ai/gpt4all (#312)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-20 00:30:36 +02:00
ci-robbot [bot]
91fc52bfb7 ⬆️ Update go-skynet/go-llama.cpp (#296)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-20 00:27:13 +02:00
mudler
b425954b9e docs: Update README 2023-05-19 19:42:40 +02:00
Ettore Di Giacinto
2e64ed6255 docs: Update README (#311) 2023-05-19 19:33:53 +02:00
Ettore Di Giacinto
bf3d936aea fix: add LLAMA_CUBLAS on BUILD_TYPE=cublas (#310) 2023-05-19 17:11:28 +02:00
Aisuko
19deea986a fix: missing model path in launch.json (#309)
Signed-off-by: Aisuko <urakiny@gmail.com>
2023-05-19 16:39:48 +02:00
Ettore Di Giacinto
aa7a18f131 github: add ISSUE_TEMPLATE (#307) 2023-05-19 11:46:53 +02:00
ci-robbot [bot]
837ce2cb31 ⬆️ Update nomic-ai/gpt4all (#295)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-19 10:37:12 +02:00
renovate[bot]
cadce540f9 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 914519e (#302)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-19 10:00:07 +02:00
Ettore Di Giacinto
1fade53a61 feat: minor enhancements to /models/apply (#297) 2023-05-19 08:31:11 +02:00
Tyler Gillson
207ce81e4a fix: Dockerfile.build missing cmake in rwkv example (#301)
Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com>
2023-05-19 01:08:20 +02:00
renovate[bot]
fc59f74849 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 94f4018 (#294)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-18 22:51:02 +02:00
renovate[bot]
9d3c5ead93 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 33f8c2d (#293)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-18 22:50:45 +02:00
Tyler Gillson
549a01b62e docs: fix langchain-chroma example (#298)
Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com>
2023-05-18 22:50:21 +02:00
renovate[bot]
5a6d9d4e5b fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 546600f (#276)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-18 21:12:42 +02:00
Sébastien Prud'homme
1a7587ee48 feat: add an environment variable to manage rebuild in Docker image (#290)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-18 19:18:32 +02:00
Ettore Di Giacinto
cc9aa9eb3f feat: add /models/apply endpoint to prepare models (#286) 2023-05-18 15:59:03 +02:00
ci-robbot [bot]
5617e50ebc ⬆️ Update go-skynet/go-llama.cpp (#256)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-18 09:52:48 +02:00
ci-robbot [bot]
b83e8b950d ⬆️ Update nomic-ai/gpt4all (#252)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-18 09:52:35 +02:00
Ettore Di Giacinto
d15fc5371a docs: update README 2023-05-17 22:33:02 +02:00
Ettore Di Giacinto
3f739575d8 Minor fixes (#285) 2023-05-17 21:01:46 +02:00
ci-robbot [bot]
7e4616646f ⬆️ Update go-skynet/go-bert.cpp (#274)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-17 11:56:32 +02:00
renovate[bot]
44ffaf86ad fix(deps): update github.com/go-skynet/go-llama.cpp digest to b7bbefb (#243)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 11:52:54 +02:00
renovate[bot]
d096644c67 fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 7bff56f (#217)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 11:52:37 +02:00
renovate[bot]
1428600de4 fix(deps): update github.com/go-skynet/go-bert.cpp digest to cea1ed7 (#242)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 09:47:22 +02:00
renovate[bot]
17b18df600 fix(deps): update github.com/donomii/go-rwkv.cpp digest to 6fdd0c3 (#240)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 08:54:52 +02:00
renovate[bot]
cd81dbae1c fix(deps): update github.com/go-skynet/bloomz.cpp digest to e9366e8 (#227)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 08:17:05 +02:00
ci-robbot [bot]
76be06ed56 ⬆️ Update go-skynet/go-gpt2.cpp (#253)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-17 01:47:31 +02:00
renovate[bot]
c2026e01c0 fix(deps): update github.com/mudler/go-stable-diffusion digest to c0748ec (#275)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 01:44:14 +02:00
Ettore Di Giacinto
cdca286be1 docker: add openblas and opencv to images (#277) 2023-05-17 01:30:30 +02:00
ci-robbot [bot]
41de6efca9 ⬆️ Update ggerganov/whisper.cpp (#265)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-17 01:04:14 +02:00
renovate[bot]
63a4ccebdc fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.5 (#264)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 01:03:43 +02:00
renovate[bot]
9237c1e91d fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 95b02d7 (#254)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-16 22:34:36 +02:00
Ettore Di Giacinto
9d051c5d4f feat: add image generation with ncnn-stablediffusion (#272) 2023-05-16 19:32:53 +02:00
Ettore Di Giacinto
acd03d15f2 feat: add support for cublas/openblas in the llama.cpp backend (#258) 2023-05-16 16:26:25 +02:00
Ettore Di Giacinto
a035de2fdd tests: add rwkv (#261) 2023-05-15 08:15:01 +02:00
Ettore Di Giacinto
76a1267799 bump: update whisper.cpp (#260) 2023-05-15 01:00:16 +02:00
Ettore Di Giacinto
e533b008d4 docs: update README 2023-05-14 18:09:26 +02:00
Ettore Di Giacinto
a4380228e3 docs: update README 2023-05-14 18:08:42 +02:00
Ettore Di Giacinto
2a9d7474ce fix(rwkv): load tokenizer file from model path (#255) 2023-05-14 17:49:10 +02:00
Ettore Di Giacinto
850a690290 docs: update README 2023-05-14 11:14:09 +02:00
Ettore Di Giacinto
39edd9ff73 docs: update README short-term roadmap 2023-05-14 11:12:29 +02:00
ci-robbot [bot]
b82bbbfc6b ⬆️ Update ggerganov/whisper.cpp (#218)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-14 10:03:55 +02:00
mudler
023c065812 docs: Fix typo 2023-05-13 22:14:35 +02:00
mudler
a627a6c4e2 docs: Update README 2023-05-13 22:14:09 +02:00
ci-robbot [bot]
6c9ddff8e9 ⬆️ Update go-skynet/go-llama.cpp (#245)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-13 22:07:43 +02:00
ci-robbot [bot]
c5318587b8 ⬆️ Update go-skynet/go-bert.cpp (#247)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-13 14:36:01 +02:00
mudler
c3622299ce docs: cleanup langchain-chroma example 2023-05-13 11:16:56 +02:00
Ettore Di Giacinto
de36a48861 Update gpt4all to fix thread counts (#249) 2023-05-13 09:37:46 +02:00
mudler
961ca93219 docs: Update README 2023-05-13 00:46:48 +02:00
Ettore Di Giacinto
557ccc5ad8 examples: add langchain-chroma example (#248) 2023-05-12 22:20:07 +02:00
Ettore Di Giacinto
2488c445b6 feat: bert.cpp token embeddings (#241) 2023-05-12 17:16:49 +02:00
Ettore Di Giacinto
b4241d0a0d tests: enable whisper (#239) 2023-05-12 14:10:18 +02:00
Ettore Di Giacinto
8250391e49 Add support for gptneox/replit (#238) 2023-05-12 11:36:35 +02:00
Ettore Di Giacinto
fd1df4e971 whisper: add tests and allow to set upload size (#237) 2023-05-12 10:04:20 +02:00
ci-robbot [bot]
5115b2faa3 ⬆️ Update go-skynet/go-llama.cpp (#219)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 23:43:55 +02:00
ci-robbot [bot]
93e82a8bf4 ⬆️ Update go-skynet/go-gpt2.cpp (#220)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 23:43:44 +02:00
Ettore Di Giacinto
4413defca5 feat: add starcoder (#236) 2023-05-11 20:20:07 +02:00
Ettore Di Giacinto
f359e1c6c4 fix: dolly/rp (#235) 2023-05-11 19:38:27 +02:00
renovate[bot]
1bc87d582d fix(deps): update module github.com/sashabaranov/go-openai to v1.9.4 (#230)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:02:19 +02:00
renovate[bot]
a86a383357 fix(deps): update github.com/donomii/go-rwkv.cpp digest to 07166da (#224)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:01:52 +02:00
renovate[bot]
16f02c7b30 fix(deps): update github.com/go-skynet/go-bert.cpp digest to ec771ec (#223)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:01:35 +02:00
Ettore Di Giacinto
fe2706890c Update README.md 2023-05-11 17:32:13 +02:00
Ettore Di Giacinto
85f0f8227d refactor: drop code dups (#234) 2023-05-11 16:34:16 +02:00
Ettore Di Giacinto
59e3c02002 make use of new bindings for gpt4all (#232) 2023-05-11 14:31:19 +02:00
Matthew Campbell
032dee256f Keep whisper models in memory (#233) 2023-05-11 14:05:07 +02:00
Matthew Campbell
6b5e2b2bf5 Upload transcription API wasn't reading the data from the post (#229) 2023-05-11 10:43:05 +02:00
renovate[bot]
6fc303de87 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 70593fc (#221)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 10:34:34 +02:00
renovate[bot]
6ad6e4873d fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 1d17cd5 (#216)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 01:14:34 +02:00
ci-robbot [bot]
d6d7391da8 ⬆️ Update donomii/go-rwkv.cpp (#225)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 01:13:28 +02:00
Ettore Di Giacinto
11675932ac feat: add dolly/redpajama/bloomz models support (#214) 2023-05-11 01:12:58 +02:00
Ettore Di Giacinto
f02202e1e1 update README 2023-05-10 15:51:16 +02:00
Ettore Di Giacinto
f8ee20991c feat: add bert.cpp embeddings (#222) 2023-05-10 15:20:21 +02:00
Cedrik Boudreau
e6db14e2f1 Added spark in projects (#215) 2023-05-10 14:05:44 +02:00
Dave
d00886abea Tiny .gitignore suggestion (#213) 2023-05-09 20:03:29 +02:00
renovate[bot]
4873d2bfa1 fix(deps): update github.com/go-skynet/go-llama.cpp digest to f4d26f4 (#212)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-09 12:17:15 +02:00
Ettore Di Giacinto
9f426578cf feat: add transcript endpoint (#211) 2023-05-09 11:43:50 +02:00
ci-robbot [bot]
9d01b695a8 ⬆️ Update go-skynet/go-llama.cpp (#209)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-08 22:37:16 +02:00
Ettore Di Giacinto
93829ab228 docs: update news 2023-05-08 22:34:12 +02:00
renovate[bot]
dd234f86d5 fix(deps): update github.com/go-skynet/go-llama.cpp digest to c03e8ad (#208)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-08 20:30:52 +02:00
mudler
3daff6f1aa doc: update README with embeddings docs 2023-05-08 20:02:59 +02:00
Ettore Di Giacinto
89dfa0f5fc feat: add experimental support for embeddings as arrays (#207) 2023-05-08 19:31:18 +02:00
renovate[bot]
bc03c492a0 fix(deps): update module github.com/gofiber/fiber/v2 to v2.45.0 (#206)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-07 17:46:31 +02:00
Fabian Hachenberg
f50a4c1454 Added runpod.io template for LocalAI to examples (#203) 2023-05-07 10:58:15 +02:00
mudler
d13d4d95ce examples: fix default parameter 2023-05-07 10:13:57 +02:00
renovate[bot]
428790ec06 fix(deps): update github.com/go-skynet/go-llama.cpp digest to cf9b522 (#202)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-07 09:14:00 +02:00
mudler
4f551ce414 examples: add update index example, update README 2023-05-07 09:05:24 +02:00
mudler
6ed7b10273 examples: add langchain agent example 2023-05-07 08:14:01 +02:00
mudler
02979566ee examples: better defaults 2023-05-07 00:58:30 +02:00
ci-robbot [bot]
cbdcc839f3 ⬆️ Update go-skynet/go-llama.cpp (#201)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-06 22:49:44 +02:00
mudler
e1c8f087f4 Update README 2023-05-06 19:18:03 +02:00
mudler
3a90ea44a5 Update readme and examples 2023-05-06 19:15:22 +02:00
renovate[bot]
e55492475d fix(deps): update github.com/go-skynet/go-llama.cpp digest to 691d479 (#189)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-06 00:42:31 +02:00
Dave
07ec2e441d mini fix - OpenAI documentation url (#200) 2023-05-06 00:42:08 +02:00
ci-robbot [bot]
38d7e0b43c ⬆️ Update go-skynet/go-llama.cpp (#198)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-06 00:21:48 +02:00
Dave
3411bfd00d Langchain Example Updates (#199) 2023-05-06 00:21:06 +02:00
Ettore Di Giacinto
7e5fe35ae4 Mixed enhancements (#196) 2023-05-06 00:00:58 +02:00
mudler
8c8cf38d4d tests: use 1 core 2023-05-05 23:29:34 +02:00
mudler
75b25297fd tests: run with ginkgo 2023-05-05 22:51:30 +02:00
mudler
009ee47fe2 Don't allow 0 as thread count 2023-05-05 22:51:20 +02:00
mudler
ec2adc2c03 tests: use 3 cores 2023-05-05 22:07:01 +02:00
mudler
ad301e6ed7 example(add): document query example 2023-05-05 21:56:31 +02:00
mudler
d094381e5d ci: lower fixtures spec 2023-05-05 21:28:38 +02:00
mudler
3ff9bbd217 examples: add rwkv script folder 2023-05-05 19:04:52 +02:00
mudler
e62ee2bc06 fix: remove trailing 0s from embeddings
This happens when no max_tokens are set, so by default go-llama
allocates more space for the slice and padding happens.
2023-05-05 18:35:03 +02:00
mudler
b49721cdd1 fix: respect config from file for backends settings 2023-05-05 18:05:10 +02:00
mudler
64c0a7967f fix: pass prediction options when using the model 2023-05-05 15:56:02 +02:00
mudler
e96eadab40 feat: support deprecated embeddings API 2023-05-05 15:55:19 +02:00
mudler
e73283121b feat: support arrays for prompt and input
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-05 15:54:59 +02:00
mudler
857d13e8d6 debug: wire up go-fiber debugger 2023-05-05 15:53:57 +02:00
ci-robbot [bot]
91db3d4d5c ⬆️ Update go-skynet/go-llama.cpp (#194)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-05 13:45:50 +02:00
Ettore Di Giacinto
961cf29217 feat: expose mirostat to config (#193) 2023-05-05 13:45:37 +02:00
Ettore Di Giacinto
c839b334eb feat: add embeddings for go-llama.cpp backend (#190) 2023-05-05 11:20:06 +02:00
Ettore Di Giacinto
714bfcd45b fix: missing returning error and free callback stream (#187) 2023-05-04 19:49:43 +02:00
renovate[bot]
77ce8b953e fix(deps): update github.com/donomii/go-rwkv.cpp digest to af62fcc (#171)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:48 +02:00
renovate[bot]
01ada95941 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 2e6ae12 (#172)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:11 +02:00
ci-robbot [bot]
eabdc5042a ⬆️ Update go-skynet/go-llama.cpp (#184)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-04 18:28:49 +02:00
Dhruv Gera
96267d9437 localai: Include the WebUI project example (#130)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-04 18:27:58 +02:00
Ettore Di Giacinto
9497a24127 fix: hardcode default number of cores to '4' (#186) 2023-05-04 18:14:58 +02:00
Ettore Di Giacinto
fdf75c6d0e rwkv fixes and examples (#185) 2023-05-04 17:32:23 +02:00
mudler
6352308882 ci: minor fixups 2023-05-04 15:08:20 +02:00
mudler
a8172a0f4e ci: fix typo 2023-05-04 15:04:41 +02:00
mudler
ebcd10d66f ci: manually update deps 2023-05-04 15:01:29 +02:00
mudler
885642915f ci: add renovate suffix 2023-05-04 12:26:59 +02:00
mudler
2e424491c0 ci: lookupNameTemplate -> depNameTemplate 2023-05-04 12:23:05 +02:00
mudler
aa6faef8f7 ci: versioning -> versioningTemplate 2023-05-04 12:07:29 +02:00
mudler
b3254baf60 ci: add versioning 2023-05-04 12:05:39 +02:00
mudler
0a43d27f0e ci: update renovate 2023-05-04 12:02:19 +02:00
Ettore Di Giacinto
3fe11fe24d ci: attempt to configure renovate with custom regexes (#178) 2023-05-04 11:55:14 +02:00
renovate[bot]
af18fdc749 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.3 (#174)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:44:02 +02:00
renovate[bot]
32b5eddd7d fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.4 (#173)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:41:51 +02:00
Dave
07c3aa1869 Dockerized Langchain / PY example (#175) 2023-05-04 08:41:13 +02:00
renovate[bot]
e59bad89e7 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.2 (#164)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 23:05:50 +02:00
Jeremy Price
b971807980 Looks for models in $CWD/models/ dir by default (#169) 2023-05-03 23:03:31 +02:00
Ettore Di Giacinto
c974dad799 Return usage in the API responses (#166) 2023-05-03 17:29:18 +02:00
Ettore Di Giacinto
4eae570ef5 Update docs (#163) 2023-05-03 15:51:54 +02:00
Ettore Di Giacinto
67992a7d99 feat: support slices or strings in the prompt completion endpoint (#162)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-03 13:13:31 +02:00
renovate[bot]
0a4899f366 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 8ceb616 (#150)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 11:48:06 +02:00
renovate[bot]
1eb02f6c91 fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.3 (#161)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 11:47:54 +02:00
mudler
575874e4fb readme: minor update 2023-05-03 11:46:29 +02:00
Ettore Di Giacinto
751b7eca62 feat: add rwkv support (#158)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-03 11:45:22 +02:00
Ettore Di Giacinto
1ae7150810 feat: allow to specify default backend for model (#156)
Signed-off-by: mudler <mudler@c3os.io>
2023-05-03 00:31:28 +02:00
Ettore Di Giacinto
70caf9bf8c feat: support stopwords both string and arrays (#154) 2023-05-02 23:30:00 +02:00
Dave
0b226ac027 Stop parameter of OpenAIRequest changed to String Array (#153) 2023-05-02 22:02:45 +02:00
Ettore Di Giacinto
220d6fd59b feat: add stream events (#152) 2023-05-02 20:03:35 +02:00
antongisli
0a00a4b58e adding mac build and example (#151)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-02 19:24:45 +02:00
Ettore Di Giacinto
156e15a4fa Bump llama.cpp, downgrade gpt4all-j (#149) 2023-05-02 16:07:18 +02:00
renovate[bot]
271d3f6673 fix(deps): update module github.com/valyala/fasthttp to v1.47.0 (#143)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 23:36:58 +02:00
Ettore Di Giacinto
fec4ab93c5 docs: Add langchain to the example index (#147)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 23:21:07 +02:00
renovate[bot]
38a7a7a54d fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 77bf8c1 (#141)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-01 23:18:41 +02:00
Ettore Di Giacinto
0db0704e2c docs: Add slack-bot example (#145)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 23:18:24 +02:00
Dave
88f472e5d2 Add LangchainJS Examples (#146) 2023-05-01 23:18:14 +02:00
Ettore Di Giacinto
92452d46da feat: add new gpt4all-j binding (#142) 2023-05-01 20:00:15 +02:00
Ettore Di Giacinto
ac70252d70 drop: remove helm charts, now in separate repo (#134)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 18:07:41 +02:00
renovate[bot]
f6451d2518 fix(deps): update module github.com/urfave/cli/v2 to v2.25.3 (#140)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 18:07:29 +02:00
Ettore Di Giacinto
2473f9d19b docs: add discord-bot preview (#137) 2023-05-01 11:03:34 +02:00
renovate[bot]
bc583385a9 fix(deps): update module github.com/urfave/cli/v2 to v2.25.2 (#136)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 07:53:48 +02:00
renovate[bot]
8286bfbab7 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.1 (#135)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 07:52:20 +02:00
Ettore Di Giacinto
d129fabe3b docs: enhancements (#133) 2023-04-30 23:27:02 +02:00
renovate[bot]
2539867247 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 377fd24 (#129)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-30 11:09:48 +02:00
renovate[bot]
69fedb92d9 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 361b9f8 (#127)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-30 08:47:27 +02:00
Ettore Di Giacinto
54b5eadcc4 docs: add discord-bot example (#126) 2023-04-30 00:31:28 +02:00
Ettore Di Giacinto
16773e2a35 feat: make images to build sources on start (#124)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-29 20:38:37 +02:00
renovate[bot]
78503c62b7 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 9bf702f (#125)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-29 16:53:39 +02:00
Ettore Di Giacinto
a330c9cee5 update: bump llama.cpp to 7f15c5c (#122)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-29 15:20:50 +02:00
Ettore Di Giacinto
ff0867996e tests: increase timeout (#121) 2023-04-29 14:56:00 +02:00
Ettore Di Giacinto
1bf8f996d1 docs: clarify GPT4ALL-J licensing (#120) 2023-04-29 14:50:22 +02:00
Ettore Di Giacinto
52f4d993c1 feat: add /edit endpoint (#119) 2023-04-29 09:22:09 +02:00
renovate[bot]
d0ceebc5d7 fix(deps): update module github.com/valyala/fasthttp to v1.46.0 (#118)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-28 22:44:29 +02:00
renovate[bot]
9122af3ae1 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 3d084e4 (#108)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-28 19:24:49 +02:00
Ettore Di Giacinto
b8533428bc bump: update llama.cpp (#117)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-28 19:24:28 +02:00
Ettore Di Giacinto
677905334c docs: reorder section (#116) 2023-04-28 13:55:23 +02:00
Mauro Morales
d1d55d29a0 Add Kairos LocalAI example to the links (#115) 2023-04-28 13:52:17 +02:00
Ettore Di Giacinto
e07dba7ad6 docs: Add contributors (#113)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-28 10:54:39 +02:00
Matthieu Talbot
062f832510 Add EXPOSE to Dockerfile (#107) 2023-04-27 16:45:24 +00:00
Ettore Di Giacinto
d0330bb64b docs: update example README.md (#104) 2023-04-27 17:46:14 +02:00
antongisli
91a23ec6ec Anton readme (#99)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-04-27 17:17:03 +02:00
Ron Evans
0b000dd043 examples: correct typo in README (#103)
Signed-off-by: deadprogram <ron@hybridgroup.com>
2023-04-27 17:14:38 +02:00
Ettore Di Giacinto
c73ba91a66 docs: update README 2023-04-27 15:39:48 +02:00
Ettore Di Giacinto
dfc00f8bc1 docs: update README.md (#98) 2023-04-27 15:06:55 +02:00
Ettore Di Giacinto
a18ff9c9b3 docs: move api docs (#96) 2023-04-27 10:42:50 +02:00
Ettore Di Giacinto
d0199279ad docs: update, add config docs (#94) 2023-04-27 10:39:01 +02:00
Ettore Di Giacinto
9ede1e12d8 few typos and clarity changes (#91) (#92)
Co-authored-by: antongisli <anton@huge.geek.nz>
2023-04-27 07:47:39 +02:00
Ettore Di Giacinto
c806eae0de feat: config files and SSE (#83)
Signed-off-by: mudler <mudler@mocaccino.org>
Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com>
Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com>
2023-04-26 21:18:18 -07:00
renovate[bot]
4e2061636e chore(deps): update actions/checkout action to v3 (#82)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-25 07:46:29 +02:00
renovate[bot]
e3ef171968 fix(deps): update module github.com/gofiber/fiber/v2 to v2.44.0 (#81)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-25 07:46:14 +02:00
Ettore Di Giacinto
12d83a4184 feat: Return OpenAI errors and update docs (#80)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-24 23:42:03 +02:00
renovate[bot]
045412e8dd fix(deps): update module github.com/urfave/cli/v2 to v2.25.1 (#78)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 18:16:23 +02:00
renovate[bot]
9896a9a58b fix(deps): update github.com/go-skynet/go-llama.cpp digest to e45cebe (#77)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 18:16:10 +02:00
Ettore Di Giacinto
b9011bda59 feat: automatic updates with renovate, docs updates (#76) 2023-04-24 18:10:58 +02:00
Ettore Di Giacinto
2b2f5fa36a feat: update llama.cpp (#72) 2023-04-24 14:15:49 +02:00
renovate[bot]
43c557dc5c fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 1f7bff5 (#74)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:14:21 +02:00
renovate[bot]
7abb2c9bd7 fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 245a5bf (#73)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:13:04 +02:00
renovate[bot]
7a9ea4480a Configure Renovate (#71)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:11:39 +02:00
Vladimir Malyutin
31bcc558de Update README.md (#62) 2023-04-22 14:42:30 +02:00
Ettore Di Giacinto
676e15f785 fix: make MacOS builds work (#61) 2023-04-22 11:05:23 +02:00
Marc R Kellerman
3e71c90949 feature: add devcontainer for live debugging (#60) 2023-04-22 01:20:03 +02:00
Ettore Di Giacinto
550ae9c968 docs: add Discord channel link (#59) 2023-04-22 00:46:17 +02:00
Ettore Di Giacinto
1c872ec326 feat: add CI/tests (#58)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-22 00:44:52 +02:00
Marc R Kellerman
05f35b182c fix(makefile): fix go-gpt2 folder and add verification before git clone (#51)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-04-22 00:29:32 +02:00
Ettore Di Giacinto
79791438fe Use the first available model if not specified (#55)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 22:54:43 +02:00
Tyler Gillson
bf20cc34f6 feat: Add helm chart (#56) 2023-04-21 13:22:03 -07:00
Ettore Di Giacinto
5cba71de70 Add stopwords, debug mode, and other API enhancements (#54)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 19:46:59 +02:00
Ettore Di Giacinto
4b7e83056d Update .env 2023-04-21 01:47:35 +02:00
Ettore Di Giacinto
ed954d66c3 Do not take all CPU by default (#50) 2023-04-21 00:55:19 +02:00
Ettore Di Giacinto
f816dfae65 Add support for stablelm (#48)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 00:06:55 +02:00
Ettore Di Giacinto
142bcd66ca Cleanup makefile, fix dep versions (#46)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-20 19:49:06 +02:00
Ettore Di Giacinto
1c4fbaae20 Add support for cerebras (#45)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-20 19:33:36 +02:00
512 changed files with 66601 additions and 919 deletions

View File

@@ -1 +1,5 @@
.idea
models
examples/chatbot-ui/models
examples/rwkv/models
examples/**/models

91
.env
View File

@@ -1,4 +1,89 @@
THREADS=14
CONTEXT_SIZE=512
## Set number of threads.
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
# THREADS=14
## Specify a different bind address (defaults to ":8080")
# ADDRESS=127.0.0.1:8080
## Default models context size
# CONTEXT_SIZE=512
#
## Define galleries.
## models will to install will be visible in `/models/available`
# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
## CORS settings
# CORS=true
# CORS_ALLOW_ORIGINS=*
## Default path for models
#
MODELS_PATH=/models
# DEBUG=true
## Enable debug mode
# DEBUG=true
## Disables COMPEL (Diffusers)
# COMPEL=0
## Enable/Disable single backend (useful if only one GPU is available)
# SINGLE_ACTIVE_BACKEND=true
## Specify a build type. Available: cublas, openblas, clblas.
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
## clBLAS: This is an open-source implementation of the BLAS library that uses OpenCL, a framework for writing programs that execute across heterogeneous platforms consisting of CPUs, GPUs, and other processors. clBLAS is designed to take advantage of the parallel computing power of GPUs but can also run on any hardware that supports OpenCL. This includes hardware from different vendors like Nvidia, AMD, and Intel.
# BUILD_TYPE=openblas
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
## Enable go tags, available: stablediffusion, tts
## stablediffusion: image generation with stablediffusion
## tts: enables text-to-speech with go-piper
## (requires REBUILD=true)
#
# GO_TAGS=stablediffusion
## Path where to store generated images
# IMAGE_PATH=/tmp
## Specify a default upload limit in MB (whisper)
# UPLOAD_LIMIT
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
### Advanced settings ###
### Those are not really used by LocalAI, but from components in the stack ###
##
### Preload libraries
# LD_PRELOAD=
### Huggingface cache for models
# HUGGINGFACE_HUB_CACHE=/usr/local/huggingface
### Python backends GRPC max workers
### Default number of workers for GRPC Python backends.
### This actually controls wether a backend can process multiple requests or not.
# PYTHON_GRPC_MAX_WORKERS=1
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
# LLAMACPP_PARALLEL=1
### Enable to run parallel requests
# PARALLEL_REQUESTS=true
### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time
# WATCHDOG_IDLE=true
#
# Enables watchdog to kill backends that are busy for too much time
# WATCHDOG_BUSY=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
# WATCHDOG_IDLE_TIMEOUT=5m
#
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
# WATCHDOG_BUSY_TIMEOUT=5m

1
.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
*.sh text eol=lf

5
.github/FUNDING.yml vendored Normal file
View File

@@ -0,0 +1,5 @@
# These are supported funding model platforms
github: [mudler]
custom:
- https://www.buymeacoffee.com/mudler

29
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,29 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug, unconfirmed, up-for-grabs
---
<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
**LocalAI version:**
<!-- Container Image or LocalAI tag/commit -->
**Environment, CPU architecture, OS, and Version:**
<!-- Provide the output from "uname -a", HW specs, if it's a VM -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**To Reproduce**
<!-- Steps to reproduce the behavior, including the LocalAI command used, if any -->
**Expected behavior**
<!-- A clear and concise description of what you expected to happen. -->
**Logs**
<!-- If applicable, add logs while running LocalAI in debug mode (`--debug` or `DEBUG=true`) to help explain your problem. -->
**Additional context**
<!-- Add any other context about the problem here. -->

8
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@@ -0,0 +1,8 @@
blank_issues_enabled: false
contact_links:
- name: Community Support
url: https://github.com/go-skynet/LocalAI/discussions
about: Please ask and answer questions here.
- name: Discord
url: https://discord.gg/uJAeKSAGDy
about: Join our community on Discord!

View File

@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement, up-for-grabs
---
<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
**Describe the solution you'd like**
<!-- A clear and concise description of what you want to happen. -->
**Describe alternatives you've considered**
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

31
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,31 @@
**Description**
This PR fixes #
**Notes for Reviewers**
**[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
<!--
Thank you for contributing to LocalAI!
Contributing Conventions
-------------------------
The draft above helps to give a quick overview of your PR.
Remember to remove this comment and to at least:
1. Include descriptive PR titles with [<component-name>] prepended. We use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out!
By following the community's contribution conventions upfront, the review process will
be accelerated and your PR merged more quickly.
If no one reviews your PR within a few days, please @-mention @mudler.
-->

9
.github/bump_deps.sh vendored Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -xe
REPO=$1
BRANCH=$2
VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"

7
.github/bump_docs.sh vendored Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
set -xe
REPO=$1
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json

24
.github/release.yml vendored Normal file
View File

@@ -0,0 +1,24 @@
# .github/release.yml
changelog:
exclude:
labels:
- ignore-for-release
categories:
- title: Breaking Changes 🛠
labels:
- Semver-Major
- breaking-change
- title: "Bug fixes :bug:"
labels:
- bug
- title: Exciting New Features 🎉
labels:
- Semver-Minor
- enhancement
- title: 👒 Dependencies
labels:
- dependencies
- title: Other Changes
labels:
- "*"

18
.github/stale.yml vendored Normal file
View File

@@ -0,0 +1,18 @@
# Number of days of inactivity before an issue becomes stale
daysUntilStale: 45
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 10
# Issues with these labels will never be considered stale
exemptLabels:
- issue/willfix
# Label to use when marking an issue as stale
staleLabel: issue/stale
# Comment to post when marking an issue as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when closing a stale issue. Set to `false` to disable
closeComment: >
This issue is being automatically closed due to inactivity.
However, you may choose to reopen this issue.

63
.github/workflows/bump_deps.yaml vendored Normal file
View File

@@ -0,0 +1,63 @@
name: Bump dependencies
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
strategy:
fail-fast: false
matrix:
include:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "ggerganov/llama.cpp"
variable: "CPPLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-ggml-transformers.cpp"
variable: "GOGGMLTRANSFORMERS_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
- repository: "ggerganov/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
- repository: "go-skynet/go-bert.cpp"
variable: "BERT_VERSION"
branch: "master"
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main"
- repository: "nomic-ai/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
- repository: "mudler/go-ggllm.cpp"
variable: "GOGGLLM_VERSION"
branch: "master"
- repository: "mudler/go-stable-diffusion"
variable: "STABLEDIFFUSION_VERSION"
branch: "master"
- repository: "mudler/go-piper"
variable: "PIPER_VERSION"
branch: "master"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Bump dependencies 🔧
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version
signoff: true

31
.github/workflows/bump_docs.yaml vendored Normal file
View File

@@ -0,0 +1,31 @@
name: Bump dependencies
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
strategy:
fail-fast: false
matrix:
include:
- repository: "mudler/LocalAI"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Bump dependencies 🔧
run: |
bash .github/bump_docs.sh ${{ matrix.repository }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
branch: "update/docs"
body: Bump of ${{ matrix.repository }} version inside docs
signoff: true

63
.github/workflows/disabled/test-gpu.yml vendored Normal file
View File

@@ -0,0 +1,63 @@
---
name: 'GPU tests'
on:
pull_request:
push:
branches:
- master
tags:
- '*'
concurrency:
group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
ubuntu-latest:
runs-on: gpu
strategy:
matrix:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
- name: Build
run: |
if [ ! -e /run/systemd/system ]; then
sudo mkdir /run/systemd/system
fi
sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
make \
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
BUILD_TYPE=cublas \
prepare-e2e run-e2e-image test-e2e
- name: Release space from worker ♻
if: always()
run: |
sudo rm -rf build || true
sudo rm -rf bin || true
sudo rm -rf dist || true
sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt
sudo cat logs.txt || true
sudo rm -rf logs.txt
make clean || true
make \
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
teardown-e2e || true
sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
docker system prune -f -a --volumes || true

86
.github/workflows/image-pr.yml vendored Normal file
View File

@@ -0,0 +1,86 @@
---
name: 'build container images tests'
on:
pull_request:
concurrency:
group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
extras-image-build:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
matrix:
include:
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'

View File

@@ -2,77 +2,159 @@
name: 'build container images'
on:
pull_request:
push:
branches:
- master
tags:
- '*'
concurrency:
group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
docker:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Prepare
id: prep
run: |
DOCKER_IMAGE=quay.io/go-skynet/local-ai
VERSION=master
SHORTREF=${GITHUB_SHA::8}
# If this is git tag, use the tag name as a docker tag
if [[ $GITHUB_REF == refs/tags/* ]]; then
VERSION=${GITHUB_REF#refs/tags/}
fi
TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
# If the VERSION looks like a version number, assume that
# this is the most recent version of the image and also
# tag it 'latest'.
if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
TAGS="$TAGS,${DOCKER_IMAGE}:latest"
fi
# Set output parameters.
echo ::set-output name=tags::${TAGS}
echo ::set-output name=docker_image::${DOCKER_IMAGE}
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@master
- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build
if: github.event_name != 'pull_request'
uses: docker/build-push-action@v4
with:
builder: ${{ steps.buildx.outputs.name }}
context: .
file: ./Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.prep.outputs.tags }}
- name: Build PRs
if: github.event_name == 'pull_request'
uses: docker/build-push-action@v4
with:
builder: ${{ steps.buildx.outputs.name }}
context: .
file: ./Dockerfile
platforms: linux/amd64
push: false
tags: ${{ steps.prep.outputs.tags }}
extras-image-build:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: ''
ffmpeg: ''
image-type: 'extras'
runs-on: 'arc-runner-set'
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
matrix:
include:
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core'
ffmpeg: ''
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
ffmpeg: ''
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'

160
.github/workflows/image_build.yml vendored Normal file
View File

@@ -0,0 +1,160 @@
---
name: 'build container images (reusable)'
on:
workflow_call:
inputs:
build-type:
description: 'Build type'
default: ''
type: string
cuda-major-version:
description: 'CUDA major version'
default: "11"
type: string
cuda-minor-version:
description: 'CUDA minor version'
default: "7"
type: string
platforms:
description: 'Platforms'
default: ''
type: string
tag-latest:
description: 'Tag latest'
default: ''
type: string
tag-suffix:
description: 'Tag suffix'
default: ''
type: string
ffmpeg:
description: 'FFMPEG'
default: ''
type: string
image-type:
description: 'Image type'
default: ''
type: string
runs-on:
description: 'Runs on'
required: true
default: ''
type: string
secrets:
dockerUsername:
required: true
dockerPassword:
required: true
quayUsername:
required: true
quayPassword:
required: true
jobs:
reusable_image-build:
runs-on: ${{ inputs.runs-on }}
steps:
- name: Force Install GIT latest
run: |
sudo apt-get update \
&& sudo apt-get install -y software-properties-common \
&& sudo apt-get update \
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v4
# - name: Release space from worker
# run: |
# echo "Listing top largest packages"
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
# head -n 30 <<< "${pkgs}"
# echo
# df -h
# echo
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
# sudo rm -rf /usr/local/lib/android
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
# sudo rm -rf /usr/share/dotnet
# sudo apt-get remove -y '^mono-.*' || true
# sudo apt-get remove -y '^ghc-.*' || true
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
# sudo apt-get remove -y 'php.*' || true
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
# sudo apt-get remove -y '^google-.*' || true
# sudo apt-get remove -y azure-cli || true
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
# sudo apt-get remove -y '^gfortran-.*' || true
# sudo apt-get remove -y microsoft-edge-stable || true
# sudo apt-get remove -y firefox || true
# sudo apt-get remove -y powershell || true
# sudo apt-get remove -y r-base-core || true
# sudo apt-get autoremove -y
# sudo apt-get clean
# echo
# echo "Listing top largest packages"
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
# head -n 30 <<< "${pkgs}"
# echo
# sudo rm -rfv build || true
# df -h
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
quay.io/go-skynet/local-ai
localai/localai
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@master
- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
username: ${{ secrets.dockerUsername }}
password: ${{ secrets.dockerPassword }}
- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.quayUsername }}
password: ${{ secrets.quayPassword }}
- name: Build and push
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
FFMPEG=${{ inputs.ffmpeg }}
IMAGE_TYPE=${{ inputs.image-type }}
context: .
file: ./Dockerfile
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

112
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,112 @@
name: Build and Release
on: push
permissions:
contents: write
concurrency:
group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
build-linux:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v4
with:
go-version: '>=1.21.0'
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v3
with:
path: grpc
key: ${{ runner.os }}-grpc
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make -j12
- name: Install gRPC
run: |
cd grpc && cd cmake/build && sudo make -j12 install
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
STATIC=true make dist
- uses: actions/upload-artifact@v3
with:
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
build-macOS:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
runs-on: macOS-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v4
with:
go-version: '>=1.21.0'
- name: Dependencies
run: |
brew install protobuf grpc
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
make dist
- uses: actions/upload-artifact@v3
with:
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*

View File

@@ -1,26 +0,0 @@
name: goreleaser
on:
push:
tags:
- 'v*'
jobs:
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v4
with:
version: latest
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

277
.github/workflows/test-extra.yml vendored Normal file
View File

@@ -0,0 +1,277 @@
---
name: 'Tests extras backends'
on:
pull_request:
push:
branches:
- master
tags:
- '*'
concurrency:
group: ci-tests-extra-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
tests-transformers:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test transformers
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/transformers
make -C backend/python/transformers test
tests-sentencetransformers:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test sentencetransformers
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/sentencetransformers
make -C backend/python/sentencetransformers test
tests-diffusers:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test diffusers
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/diffusers
make -C backend/python/diffusers test
tests-transformers-musicgen:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test transformers-musicgen
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/transformers-musicgen
make -C backend/python/transformers-musicgen test
tests-petals:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test petals
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/petals
make -C backend/python/petals test
tests-bark:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test bark
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/bark
make -C backend/python/bark test
# Below tests needs GPU. Commented out for now
# TODO: Re-enable as soon as we have GPU nodes
# tests-vllm:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
# sudo apt-get update && \
# sudo apt-get install -y conda
# sudo apt-get install -y ca-certificates cmake curl patch
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
# sudo rm -rfv /usr/bin/conda || true
# - name: Test vllm
# run: |
# export PATH=$PATH:/opt/conda/bin
# make -C backend/python/vllm
# make -C backend/python/vllm test
tests-vallex:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test vall-e-x
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/vall-e-x
make -C backend/python/vall-e-x test
tests-coqui:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng
sudo rm -rfv /usr/bin/conda || true
- name: Test coqui
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/coqui
make -C backend/python/coqui test

133
.github/workflows/test.yml vendored Normal file
View File

@@ -0,0 +1,133 @@
---
name: 'tests'
on:
pull_request:
push:
branches:
- master
tags:
- '*'
concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
tests-linux:
runs-on: ubuntu-latest
strategy:
matrix:
go-version: ['1.21.x']
steps:
- name: Release space from worker
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
df -h
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
# Pre-build piper before we start tests in order to have shared libraries in place
make sources/go-piper && \
GO_TAGS="tts" make -C sources/go-piper piper.o && \
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v3
with:
path: grpc
key: ${{ runner.os }}-grpc
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make -j12
- name: Install gRPC
run: |
cd grpc && cd cmake/build && sudo make -j12 install
- name: Test
run: |
GO_TAGS="stablediffusion tts" make test
tests-apple:
runs-on: macOS-latest
strategy:
matrix:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
brew install protobuf grpc
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test

36
.gitignore vendored
View File

@@ -1,11 +1,39 @@
# go-llama build artifacts
go-llama
go-gpt4all-j
/sources/
__pycache__/
*.a
get-sources
prepare-sources
/backend/cpp/llama/grpc-server
/backend/cpp/llama/llama.cpp
go-ggml-transformers
go-gpt2
go-rwkv
whisper.cpp
/bloomz
go-bert
# LocalAI build binary
LocalAI
local-ai
# prevent above rules from omitting the helm chart
!charts/*
# prevent above rules from omitting the api/localai folder
!api/localai
# Ignore models
models/*.bin
models/ggml-*
models/*
test-models/
test-dir/
release/
# just in case
.DS_Store
.idea
# Generated during build
backend-assets/
prepare
/ggml-metal.metal

6
.gitmodules vendored Normal file
View File

@@ -0,0 +1,6 @@
[submodule "docs/themes/hugo-theme-relearn"]
path = docs/themes/hugo-theme-relearn
url = https://github.com/McShelby/hugo-theme-relearn.git
[submodule "docs/themes/lotusdocs"]
path = docs/themes/lotusdocs
url = https://github.com/colinwilson/lotusdocs

View File

@@ -1,15 +0,0 @@
# Make sure to check the documentation at http://goreleaser.com
project_name: local-ai
builds:
- ldflags:
- -w -s
env:
- CGO_ENABLED=0
goos:
- linux
- darwin
- windows
goarch:
- amd64
- arm64
binary: '{{ .ProjectName }}'

41
.vscode/launch.json vendored
View File

@@ -1,16 +1,33 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Launch Go",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}/main.go",
"args": [
"api"
]
}
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/examples/langchain-chroma",
"env": {
"OPENAI_API_BASE": "http://localhost:8080/v1",
"OPENAI_API_KEY": "abc"
}
},
{
"name": "Launch LocalAI API",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}/main.go",
"args": [
"api"
],
"env": {
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"DEBUG": "true"
}
}
]
}
}

72
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,72 @@
# Contributing to localAI
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
## Table of Contents
- [Getting Started](#getting-started)
- [Prerequisites](#prerequisites)
- [Setting up the Development Environment](#setting-up-the-development-environment)
- [Contributing](#contributing)
- [Submitting an Issue](#submitting-an-issue)
- [Creating a Pull Request (PR)](#creating-a-pull-request-pr)
- [Coding Guidelines](#coding-guidelines)
- [Testing](#testing)
- [Documentation](#documentation)
- [Community and Communication](#community-and-communication)
## Getting Started
### Prerequisites
- Golang [1.21]
- Git
- macOS/Linux
### Setting up the Development Environment and running localAI in the local environment
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
2. Navigate to the project directory: `cd LocalAI`
3. Install the required dependencies: `make prepare`
4. Run LocalAI: `make run`
## Contributing
We welcome contributions from everyone! To get started, follow these steps:
### Submitting an Issue
If you find a bug, have a feature request, or encounter any issues, please check the [issue tracker](https://github.com/go-skynet/LocalAI/issues) to see if a similar issue has already been reported. If not, feel free to [create a new issue](https://github.com/go-skynet/LocalAI/issues/new) and provide as much detail as possible.
### Creating a Pull Request (PR)
1. Fork the repository.
2. Create a new branch with a descriptive name: `git checkout -b [branch name]`
3. Make your changes and commit them.
4. Push the changes to your fork: `git push origin [branch name]`
5. Create a new pull request from your branch to the main project's `main` or `master` branch.
6. Provide a clear description of your changes in the pull request.
7. Make any requested changes during the review process.
8. Once your PR is approved, it will be merged into the main project.
## Coding Guidelines
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
## Testing
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
## Documentation
- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
## Community and Communication
- You can reach out via the Github issue tracker.
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
---

View File

@@ -1,12 +1,207 @@
ARG GO_VERSION=1.20
ARG DEBIAN_VERSION=11
FROM golang:$GO_VERSION as builder
WORKDIR /build
RUN apt-get update && apt-get install -y cmake
COPY . .
ARG BUILD_TYPE=
RUN make build${BUILD_TYPE}
ARG GO_VERSION=1.21-bullseye
ARG IMAGE_TYPE=extras
# extras or core
FROM debian:$DEBIAN_VERSION
COPY --from=builder /build/local-ai /usr/bin/local-ai
ENTRYPOINT [ "/usr/bin/local-ai" ]
FROM golang:$GO_VERSION as requirements-core
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=7
ARG TARGETARCH
ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE}
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
ARG GO_TAGS="stablediffusion tinydream tts"
RUN apt-get update && \
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# CuBLAS requirements
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get install -y software-properties-common && \
apt-add-repository contrib && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
rm -f cuda-keyring_1.0-1_all.deb && \
apt-get update && \
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}
# OpenBLAS requirements and stable diffusion
RUN apt-get install -y \
libopenblas-dev \
libopencv-dev \
&& apt-get clean
# Set up OpenCV
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
WORKDIR /build
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Extras requirements
FROM requirements-core as requirements-extras
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
apt-get update && \
apt-get install -y conda && apt-get clean
ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip install --upgrade pip
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN apt-get install -y espeak-ng espeak && apt-get clean
###################################
###################################
FROM requirements-${IMAGE_TYPE} as builder
ARG GO_TAGS="stablediffusion tts"
ARG GRPC_BACKENDS
ARG BUILD_GRPC=true
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
WORKDIR /build
COPY . .
COPY .git .
RUN make prepare
# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && make -j12 install \
; fi
# Rebuild with defaults backends
RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
; fi
###################################
###################################
FROM requirements-${IMAGE_TYPE}
ARG FFMPEG
ARG BUILD_TYPE
ARG TARGETARCH
ARG IMAGE_TYPE=extras
ENV BUILD_TYPE=${BUILD_TYPE}
ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ARG CUDA_MAJOR_VERSION=11
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV PIP_CACHE_PURGE=true
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get install -y ffmpeg && apt-get clean \
; fi
WORKDIR /build
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
# https://github.com/go-skynet/LocalAI/pull/434
COPY . .
COPY --from=builder /build/sources ./sources/
COPY --from=builder /build/grpc ./grpc/
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
# Copy the binary
COPY --from=builder /build/local-ai ./
# Copy shared libraries for piper
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
## Duplicated from Makefile to avoid having a big layer that's hard to push
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/mamba \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \
; fi
# Make sure the models directory exists
RUN mkdir -p /build/models
# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
EXPOSE 8080
ENTRYPOINT [ "/build/entrypoint.sh" ]

10
Entitlements.plist Normal file
View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.security.network.client</key>
<true/>
<key>com.apple.security.network.server</key>
<true/>
</dict>
</plist>

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023 go-skynet authors
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

571
Makefile
View File

@@ -2,69 +2,392 @@ GOCMD=go
GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
GOLLAMA_VERSION?=llama.cpp-5ecff35
# llama.cpp versions
GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
CPPLLAMA_VERSION?=6db2b41a76ee78d5efdd5c3cddd5d7ad3f646855
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
# go-ggml-transformers version
GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f
# whisper.cpp version
WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346
# bert.cpp version
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
# go-piper version
PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07
# stablediffusion version
STABLEDIFFUSION_VERSION?=902db5f066fd137697e3b69d0fa10d4782bd2c2f
# tinydream version
TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=
CGO_LDFLAGS?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=git
TEST_DIR=/tmp/test
RANDOM := $(shell bash -c 'echo $$RANDOM')
VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit
LD_FLAGS?=
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
OPTIONAL_TARGETS?=
OS := $(shell uname -s)
ARCH := $(shell uname -m)
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
# Default Docker bridge IP
E2E_BRIDGE_IP?=172.17.0.1
ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
ifeq ($(OS),Darwin)
CGO_LDFLAGS += -lcblas -framework Accelerate
ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
endif
# on OSX, if BUILD_TYPE is blank, we should default to use Metal
ifeq ($(BUILD_TYPE),)
BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DLLAMA_METAL=OFF
endif
endif
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
export WHISPER_OPENBLAS=1
endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export LLAMA_CUBLAS=1
export WHISPER_CUBLAS=1
endif
ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export WHISPER_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link
endif
ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
export LLAMA_METAL=1
export WHISPER_METAL=1
endif
ifeq ($(BUILD_TYPE),clblas)
CGO_LDFLAGS+=-lOpenCL -lclblast
export WHISPER_CLBLAST=1
endif
# glibc-static or glibc-devel-static required
ifeq ($(STATIC),true)
LD_FLAGS=-linkmode external -extldflags -static
endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
endif
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
endif
ifeq ($(findstring tts,$(GO_TAGS)),tts)
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
PIPER_CGO_CXXFLAGS+=-I$(CURDIR)/sources/go-piper/piper/src/cpp -I$(CURDIR)/sources/go-piper/piper/build/fi/include -I$(CURDIR)/sources/go-piper/piper/build/pi/include -I$(CURDIR)/sources/go-piper/piper/build/si/include
PIPER_CGO_LDFLAGS+=-L$(CURDIR)/sources/go-piper/piper/build/fi/lib -L$(CURDIR)/sources/go-piper/piper/build/pi/lib -L$(CURDIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
OPTIONAL_GRPC+=backend-assets/grpc/piper
endif
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
# If empty, then we build all
ifeq ($(GRPC_BACKENDS),)
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
endif
ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS=
endif
.PHONY: all test build vendor
all: help
## GPT4ALL
sources/gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
## go-piper
sources/go-piper:
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
## BERT embeddings
sources/go-bert:
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
## stable diffusion
sources/go-stable-diffusion:
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
sources/go-stable-diffusion/libstablediffusion.a:
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## tiny-dream
sources/go-tiny-dream:
git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
sources/go-tiny-dream/libtinydream.a:
$(MAKE) -C sources/go-tiny-dream libtinydream.a
## RWKV
sources/go-rwkv:
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
sources/go-rwkv/librwkv.a: sources/go-rwkv
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
sources/go-bert/libgobert.a: sources/go-bert
$(MAKE) -C sources/go-bert libgobert.a
backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
mkdir -p backend-assets/gpt4all
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
backend-assets/espeak-ng-data: sources/go-piper
mkdir -p backend-assets/espeak-ng-data
$(MAKE) -C sources/go-piper piper.o
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## CEREBRAS GPT
sources/go-ggml-transformers:
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp sources/go-ggml-transformers
cd sources/go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
sources/go-ggml-transformers/libtransformers.a: sources/go-ggml-transformers
$(MAKE) -C sources/go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
sources/whisper.cpp:
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && make libwhisper.a
sources/go-llama:
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
sources/go-llama-ggml:
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
sources/go-llama/libbinding.a: sources/go-llama
$(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main
backend/cpp/llama/llama.cpp:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/go-ggml-transformers sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
touch $@
replace:
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(CURDIR)/sources/go-ggml-transformers
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
prepare-sources: get-sources replace
$(GOCMD) mod download
touch $@
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama clean
$(MAKE) -C sources/go-llama-ggml clean
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C sources/go-ggml-transformers clean
$(MAKE) -C sources/go-rwkv clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert clean
$(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
prepare: prepare-sources $(OPTIONAL_TARGETS)
touch $@
clean: ## Remove build related file
$(GOCMD) clean -cache
rm -f prepare
rm -rf ./sources
rm -rf $(BINARY_NAME)
rm -rf release/
rm -rf backend-assets
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/cpp/llama clean
## Build:
build: prepare ## Build the project
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j $(GOCMD) build -o $(BINARY_NAME) ./
build: backend-assets grpcs prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
buildgeneric: prepare-generic ## Build the project
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j $(GOCMD) build -o $(BINARY_NAME) ./
dist: build
mkdir -p release
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
go-gpt4all-j:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
osx-signed: build
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
go-gpt4all-j/libgptj.a: go-gpt4all-j
$(MAKE) -C go-gpt4all-j libgptj.a
## Run
run: prepare ## run local-ai
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
go-gpt4all-j/libgptj.a-generic: go-gpt4all-j
$(MAKE) -C go-gpt4all-j generic-libgptj.a
test-models/testmodel:
mkdir test-models
mkdir test-dir
wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
cp tests/models_fixtures/* test-models
go-llama:
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
$(MAKE) -C go-llama libbinding.a
prepare-test: grpcs
cp -rf backend-assets api
cp tests/models_fixtures/* test-models
go-llama-generic:
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
$(MAKE) -C go-llama generic-libbinding.a
test: prepare test-models/testmodel grpcs
@echo 'Running tests'
export GO_TAGS="tts stablediffusion"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
$(MAKE) test-gpt4all
$(MAKE) test-llama
$(MAKE) test-llama-gguf
$(MAKE) test-tts
$(MAKE) test-stablediffusion
prepare: go-llama go-gpt4all-j/libgptj.a
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
prepare-generic: go-llama-generic go-gpt4all-j/libgptj.a-generic
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./go-gpt4all-j
rm -rf $(BINARY_NAME)
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
## Run:
run: prepare
$(GOCMD) run ./ api
test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
## Test:
test: ## Run the tests of the project
$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)
test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg
test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg
test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r ./api ./pkg
test-container:
docker build --target requirements -t local-ai-test-container .
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
## Help:
help: ## Show this help.
@@ -77,3 +400,171 @@ help: ## Show this help.
if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
}' $(MAKEFILE_LIST)
protogen: protogen-go protogen-python
protogen-go:
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
backend/backend.proto
protogen-python:
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto
## GRPC
# Note: it is duplicated in the Dockerfile
prepare-extra-conda-environments:
$(MAKE) -C backend/python/autogptq
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/mamba
$(MAKE) -C backend/python/sentencetransformers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/transformers-musicgen
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
$(MAKE) -C backend/python/exllama2
prepare-test-extra:
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/diffusers
test-extra: prepare-test-extra
$(MAKE) -C backend/python/transformers test
$(MAKE) -C backend/python/diffusers test
backend-assets:
mkdir -p backend-assets
ifeq ($(BUILD_API_ONLY),true)
touch backend-assets/keep
endif
backend-assets/grpc:
mkdir -p backend-assets/grpc
backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
# TODO: every binary should have its own folder instead, so can have different implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama/llama.cpp/ggml-metal.metal backend-assets/grpc/
endif
## BACKEND CPP LLAMA START
# Sets the variables in case it has to build the gRPC locally.
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
backend/cpp/llama/grpc-server:
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
$(MAKE) -C backend/cpp/grpc build
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
export PATH="${INSTALLED_PACKAGES}/bin:${PATH}" && \
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
endif
## BACKEND CPP LLAMA END
##
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
endif
backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/
backend-assets/grpc/gpt2: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./backend/go/llm/gpt2/
backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/
backend-assets/grpc/gptneox: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./backend/go/llm/gptneox/
backend-assets/grpc/mpt: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./backend/go/llm/mpt/
backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/
backend-assets/grpc/falcon-ggml: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./backend/go/llm/falcon-ggml/
backend-assets/grpc/starcoder: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./backend/go/llm/starcoder/
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
backend-assets/grpc/stablediffusion: backend-assets/grpc
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
$(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \
fi
backend-assets/grpc/tinydream: backend-assets/grpc sources/go-tiny-dream/libtinydream.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
grpcs: prepare $(GRPC_BACKENDS)

303
README.md
View File

@@ -1,219 +1,186 @@
<h1 align="center">
<br>
<img height="300" src="https://user-images.githubusercontent.com/2420543/233147843-88697415-6dbf-4368-a862-ab217f9f7342.jpeg"> <br>
<img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
LocalAI
<br>
</h1>
> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
<p align="center">
<a href="https://github.com/go-skynet/LocalAI/fork" target="blank">
<img src="https://img.shields.io/github/forks/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI forks"/>
</a>
<a href="https://github.com/go-skynet/LocalAI/stargazers" target="blank">
<img src="https://img.shields.io/github/stars/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI stars"/>
</a>
<a href="https://github.com/go-skynet/LocalAI/pulls" target="blank">
<img src="https://img.shields.io/github/issues-pr/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI pull-requests"/>
</a>
<a href='https://github.com/go-skynet/LocalAI/releases'>
<img src='https://img.shields.io/github/release/go-skynet/LocalAI?&label=Latest&style=for-the-badge'>
</a>
</p>
LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
- OpenAI compatible API
- Supports multiple-models
- Once loaded the first time, it keep models loaded in memory for faster inference
- Provides a simple command line interface that allows text generation directly from the terminal
- Support for prompt templates
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
## Model compatibility
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) and also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all).
<p align="center">
<a href="https://twitter.com/LocalAI_API" target="blank">
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
</a>
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
</a>
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API thats compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
## Usage
## 🔥🔥 Hot topics / Roadmap
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
The easiest way to run LocalAI is by using `docker-compose`:
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
- Inline templates: https://github.com/mudler/LocalAI/pull/1452
- Mixtral: https://github.com/mudler/LocalAI/pull/1449
- Img2vid https://github.com/mudler/LocalAI/pull/1442
- Musicgen https://github.com/mudler/LocalAI/pull/1387
```bash
Hot topics (looking for contributors):
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
git clone https://github.com/go-skynet/LocalAI
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
cd LocalAI
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
# copy your models to models/
cp your-model.bin models/
# (optional) Edit the .env file to set things like context size and threads
# vim .env
# start with docker-compose
docker compose up -d --build
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
# {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "your-model.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
## Prompt templates
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
<details>
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`:
```
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Input}}
### Response:
docker run -ti -p 8080:8080 localai/localai:v2.5.1-ffmpeg-core phi-2
```
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for most popular models.
## 🚀 [Features](https://localai.io/features/)
</details>
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
## API
## 💻 Usage
`LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
<details>
Example of starting the API with `docker`:
### 🔗 Community and integrations
```bash
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
```
Build and deploy custom containers:
- https://github.com/sozercan/aikit
And you'll see:
```
┌───────────────────────────────────────────────────┐
│ Fiber v2.42.0 │
│ http://127.0.0.1:8080 │
│ (bound on host 0.0.0.0 and port 8080) │
│ │
│ Handlers ............. 1 Processes ........... 1 │
│ Prefork ....... Disabled PID ................. 1 │
└───────────────────────────────────────────────────┘
```
WebUIs:
- https://github.com/Jirubizu/localai-admin
- https://github.com/go-skynet/LocalAI-frontend
Note: Models have to end up with `.bin` so can be listed by the `/models` endpoint.
Model galleries
- https://github.com/go-skynet/model-gallery
Auto Docker / Model setup
- https://io.midori-ai.xyz/howtos/easy-localai-installer/
- https://io.midori-ai.xyz/howtos/easy-model-installer/
You can control the API server options with command line arguments:
Other:
- Helm chart https://github.com/go-skynet/helm-charts
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
- Local Smart assistant https://github.com/mudler/LocalAGI
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
### 🔗 Resources
- 🆕 New! [LLM finetuning guide](https://localai.io/advanced/fine-tuning/)
- [How to build locally](https://localai.io/basics/build/index.html)
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
- [Projects integrating LocalAI](https://localai.io/integrations/)
- [How tos section](https://io.midori-ai.xyz/howtos/) (curated by our community)
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65)
## Citation
If you utilize this repository, data in a downstream project, please consider citing it with:
```
local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
@misc{localai,
author = {Ettore Di Giacinto},
title = {LocalAI: The free, Open source OpenAI alternative},
year = {2023},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/go-skynet/LocalAI}},
```
The API takes takes the following parameters:
## ❤️ Sponsors
| Parameter | Environment Variable | Default Value | Description |
| ------------ | -------------------- | ------------- | -------------------------------------- |
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
| threads | THREADS | CPU cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
> Do you find LocalAI useful?
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
</details>
A huge thank you to our generous sponsors who support this project:
### Supported OpenAI API endpoints
| ![Spectro Cloud logo_600x600px_transparent bg](https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512) |
|:-----------------------------------------------:|
| [Spectro Cloud](https://www.spectrocloud.com/) |
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
Following the list of endpoints/parameters supported.
- [Sponsor list](https://github.com/sponsors/mudler)
- JDAM00 (donating HW for the CI)
#### Chat completions
## 🌟 Star history
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
```
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
## 📖 License
Available additional parameters: `top_p`, `top_k`, `max_tokens`
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
#### Completions
MIT - Author Ettore Di Giacinto
For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
## 🙇 Acknowledgements
Available additional parameters: `top_p`, `top_k`, `max_tokens`
#### List models
You can list all the models available with:
```
curl http://localhost:8080/v1/models
```
## Using other models
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
```bash
wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
mkdir models
cp gpt4all.. models/
git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
pip install sentencepiece
python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
# There will be a new model with the ".tmp" extension, you have to use that one!
```
### Windows compatibility
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
### Kubernetes
You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
### Build locally
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
In order to build the `LocalAI` container image locally you can use `docker`:
```
# build the image
docker build -t LocalAI .
docker run LocalAI
```
Or build the binary with `make`:
```
make build
```
## Short-term roadmap
- [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
- Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
- [x] Multi-model support
- Have a webUI!
## License
MIT
## Acknowledgements
LocalAI couldn't have been built without the help of great software already available from the community. Thank you!
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
- https://github.com/tatsu-lab/stanford_alpaca
- https://github.com/cornelk/llama-go for the initial ideas
- https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
- https://github.com/antimatter15/alpaca.cpp
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/ggerganov/whisper.cpp
- https://github.com/saharNooby/rwkv.cpp
- https://github.com/rhasspy/piper
- https://github.com/cmp-nct/ggllm.cpp
## 🤗 Contributors
This is a community project, a special thanks to our contributors! 🤗
<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
<img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
</a>

View File

@@ -4,288 +4,121 @@ import (
"encoding/json"
"errors"
"fmt"
"os"
"strings"
"sync"
model "github.com/go-skynet/LocalAI/pkg/model"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/localai"
"github.com/go-skynet/LocalAI/api/openai"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/go-skynet/LocalAI/internal"
"github.com/go-skynet/LocalAI/metrics"
"github.com/go-skynet/LocalAI/pkg/assets"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/startup"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"chat.completion,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
}
func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, error) {
options := options.NewOptions(opts...)
type Choice struct {
Index int `json:"index,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
Message *Message `json:"message,omitempty"`
Text string `json:"text,omitempty"`
}
type Message struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
}
type OpenAIModel struct {
ID string `json:"id"`
Object string `json:"object"`
}
type OpenAIRequest struct {
Model string `json:"model"`
// Prompt is read only by completion API calls
Prompt string `json:"prompt"`
// Messages is read only by chat/completion API calls
Messages []Message `json:"messages"`
Echo bool `json:"echo"`
// Common options between all the API calls
TopP float64 `json:"top_p"`
TopK int `json:"top_k"`
Temperature float64 `json:"temperature"`
Maxtokens int `json:"max_tokens"`
N int `json:"n"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch"`
F16 bool `json:"f16kv"`
IgnoreEOS bool `json:"ignore_eos"`
Seed int `json:"seed"`
}
// https://platform.openai.com/docs/api-reference/completions
func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
var err error
var model *llama.LLama
var gptModel *gptj.GPTJ
input := new(OpenAIRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
modelFile := input.Model
received, _ := json.Marshal(input)
log.Debug().Msgf("Request received: %s", string(received))
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
if modelFile == "" && !bearerExists {
return fmt.Errorf("no model specified")
}
if bearerExists { // model specified in bearer token takes precedence
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
// Try to load the model with both
var llamaerr error
llamaOpts := []llama.ModelOption{}
if ctx != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(ctx))
}
if f16 {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
if llamaerr != nil {
gptModel, err = loader.LoadGPTJModel(modelFile)
if err != nil {
return fmt.Errorf("llama: %s gpt: %s", llamaerr.Error(), err.Error()) // llama failed first, so we want to catch both errors
}
}
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[modelFile]
if !ok {
m := &sync.Mutex{}
mutexes[modelFile] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
// Set the parameters for the language model prediction
topP := input.TopP
if topP == 0 {
topP = 0.7
}
topK := input.TopK
if topK == 0 {
topK = 80
}
temperature := input.Temperature
if temperature == 0 {
temperature = 0.9
}
tokens := input.Maxtokens
if tokens == 0 {
tokens = 512
}
predInput := input.Prompt
if chat {
mess := []string{}
// TODO: encode roles
for _, i := range input.Messages {
mess = append(mess, i.Content)
}
predInput = strings.Join(mess, "\n")
}
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(modelFile, struct {
Input string
}{Input: predInput})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
result := []Choice{}
n := input.N
if input.N == 0 {
n = 1
}
var predFunc func() (string, error)
switch {
case gptModel != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gptj.PredictOption{
gptj.SetTemperature(temperature),
gptj.SetTopP(topP),
gptj.SetTopK(topK),
gptj.SetTokens(tokens),
gptj.SetThreads(threads),
}
if input.Batch != 0 {
predictOptions = append(predictOptions, gptj.SetBatch(input.Batch))
}
if input.Seed != 0 {
predictOptions = append(predictOptions, gptj.SetSeed(input.Seed))
}
return gptModel.Predict(
predInput,
predictOptions...,
)
}
case model != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []llama.PredictOption{
llama.SetTemperature(temperature),
llama.SetTopP(topP),
llama.SetTopK(topK),
llama.SetTokens(tokens),
llama.SetThreads(threads),
}
if input.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
}
if input.F16 {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if input.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if input.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(input.Seed))
}
return model.Predict(
predInput,
predictOptions...,
)
}
}
for i := 0; i < n; i++ {
prediction, err := predFunc()
if err != nil {
return err
}
if input.Echo {
prediction = predInput + prediction
}
if chat {
result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
} else {
result = append(result, Choice{Text: prediction})
}
}
jsonResult, _ := json.Marshal(result)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
})
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if options.Debug {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
}
}
func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
models, err := loader.ListModels()
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
startup.PreloadModelsConfigurations(options.ModelLibraryURL, options.Loader.ModelPath, options.ModelsURL...)
cl := config.NewConfigLoader()
if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
log.Error().Msgf("error loading config files: %s", err.Error())
}
if options.ConfigFile != "" {
if err := cl.LoadConfigFile(options.ConfigFile); err != nil {
log.Error().Msgf("error loading config file: %s", err.Error())
}
}
if err := cl.Preload(options.Loader.ModelPath); err != nil {
log.Error().Msgf("error downloading models: %s", err.Error())
}
if options.PreloadJSONModels != "" {
if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
return nil, nil, err
}
}
if options.PreloadModelsFromPath != "" {
if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
return nil, nil, err
}
}
if options.Debug {
for _, v := range cl.ListConfigs() {
cfg, _ := cl.GetConfig(v)
log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
}
}
if options.AssetsDestination != "" {
// Extract files from the embedded FS
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
if err != nil {
return err
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
}
dataModels := []OpenAIModel{}
for _, m := range models {
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
}
return c.JSON(struct {
Object string `json:"object"`
Data []OpenAIModel `json:"data"`
}{
Object: "list",
Data: dataModels,
})
}
// turn off any process that was started by GRPC if the context is canceled
go func() {
<-options.Context.Done()
log.Debug().Msgf("Context canceled, shutting down")
options.Loader.StopAllGRPC()
}()
if options.WatchDog {
wd := model.NewWatchDog(
options.Loader,
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle)
options.Loader.SetWatchDog(wd)
go wd.Run()
go func() {
<-options.Context.Done()
log.Debug().Msgf("Context canceled, shutting down")
wd.Shutdown()
}()
}
return options, cl, nil
}
func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f16 bool) error {
func App(opts ...options.AppOption) (*fiber.App, error) {
options, cl, err := Startup(opts...)
if err != nil {
return nil, fmt.Errorf("failed basic startup tasks with error %s", err.Error())
}
// Return errors as JSON responses
app := fiber.New(fiber.Config{
BodyLimit: options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: options.DisableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -298,31 +131,158 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
}
// Send custom error page
return ctx.Status(code).JSON(struct {
Error string `json:"error"`
}{Error: err.Error()})
return ctx.Status(code).JSON(
schema.ErrorResponse{
Error: &schema.APIError{Message: err.Error(), Code: code},
},
)
},
})
if options.Debug {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
}
// Default middleware config
app.Use(recover.New())
app.Use(cors.New())
if options.Metrics != nil {
app.Use(metrics.APIMiddleware(options.Metrics))
}
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mu := map[string]*sync.Mutex{}
var mumutex = &sync.Mutex{}
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
auth := func(c *fiber.Ctx) error {
if len(options.ApiKeys) == 0 {
return c.Next()
}
// Check for api_keys.json file
fileContent, err := os.ReadFile("api_keys.json")
if err == nil {
// Parse JSON content from the file
var fileKeys []string
err := json.Unmarshal(fileContent, &fileKeys)
if err != nil {
return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
}
// Add file keys to options.ApiKeys
options.ApiKeys = append(options.ApiKeys, fileKeys...)
}
if len(options.ApiKeys) == 0 {
return c.Next()
}
authHeader := c.Get("Authorization")
if authHeader == "" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
}
authHeaderParts := strings.Split(authHeader, " ")
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
}
apiKey := authHeaderParts[1]
for _, key := range options.ApiKeys {
if apiKey == key {
return c.Next()
}
}
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
}
if options.CORS {
var c func(ctx *fiber.Ctx) error
if options.CORSAllowOrigins == "" {
c = cors.New()
} else {
c = cors.New(cors.Config{AllowOrigins: options.CORSAllowOrigins})
}
app.Use(c)
}
// LocalAI API endpoints
galleryService := localai.NewGalleryService(options.Loader.ModelPath)
galleryService.Start(options.Context, cl)
app.Get("/version", auth, func(c *fiber.Ctx) error {
return c.JSON(struct {
Version string `json:"version"`
}{Version: internal.PrintableVersion()})
})
// Make sure directories exists
os.MkdirAll(options.ImageDir, 0755)
os.MkdirAll(options.AudioDir, 0755)
os.MkdirAll(options.Loader.ModelPath, 0755)
modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService)
app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint())
app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint())
app.Get("/models/galleries", auth, modelGalleryService.ListModelGalleriesEndpoint())
app.Post("/models/galleries", auth, modelGalleryService.AddModelGalleryEndpoint())
app.Delete("/models/galleries", auth, modelGalleryService.RemoveModelGalleryEndpoint())
app.Get("/models/jobs/:uuid", auth, modelGalleryService.GetOpStatusEndpoint())
app.Get("/models/jobs", auth, modelGalleryService.GetAllStatusEndpoint())
// openAI compatible API endpoint
app.Post("/v1/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/v1/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
// chat
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, options))
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, options))
app.Get("/v1/models", listModels(loader))
app.Get("/models", listModels(loader))
// edit
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, options))
app.Post("/edits", auth, openai.EditEndpoint(cl, options))
// Start the server
app.Listen(listenAddr)
return nil
// completion
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, options))
app.Post("/completions", auth, openai.CompletionEndpoint(cl, options))
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, options))
// embeddings
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
// audio
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, options))
app.Post("/tts", auth, localai.TTSEndpoint(cl, options))
// images
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, options))
if options.ImageDir != "" {
app.Static("/generated-images", options.ImageDir)
}
if options.AudioDir != "" {
app.Static("/generated-audio", options.AudioDir)
}
ok := func(c *fiber.Ctx) error {
return c.SendStatus(200)
}
// Kubernetes health checks
app.Get("/healthz", ok)
app.Get("/readyz", ok)
// Experimental Backend Statistics Module
backendMonitor := localai.NewBackendMonitor(cl, options) // Split out for now
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor))
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor))
// models
app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
app.Get("/metrics", metrics.MetricsHandler())
return app, nil
}

861
api/api_test.go Normal file
View File

@@ -0,0 +1,861 @@
package api_test
import (
"bytes"
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"runtime"
. "github.com/go-skynet/LocalAI/api"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/metrics"
"github.com/go-skynet/LocalAI/pkg/downloader"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"gopkg.in/yaml.v3"
openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai"
"github.com/sashabaranov/go-openai/jsonschema"
)
type modelApplyRequest struct {
ID string `json:"id"`
URL string `json:"url"`
Name string `json:"name"`
Overrides map[string]interface{} `json:"overrides"`
}
func getModelStatus(url string) (response map[string]interface{}) {
// Create the HTTP request
resp, err := http.Get(url)
if err != nil {
fmt.Println("Error creating request:", err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Println("Error reading response body:", err)
return
}
// Unmarshal the response into a map[string]interface{}
err = json.Unmarshal(body, &response)
if err != nil {
fmt.Println("Error unmarshaling JSON response:", err)
return
}
return
}
func getModels(url string) (response []gallery.GalleryModel) {
downloader.GetURI(url, func(url string, i []byte) error {
// Unmarshal YAML data into a struct
return json.Unmarshal(i, &response)
})
return
}
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
//url := "http://localhost:AI/models/apply"
// Create the request payload
payload, err := json.Marshal(request)
if err != nil {
fmt.Println("Error marshaling JSON:", err)
return
}
// Create the HTTP request
req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
if err != nil {
fmt.Println("Error creating request:", err)
return
}
req.Header.Set("Content-Type", "application/json")
// Make the request
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
fmt.Println("Error making request:", err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Println("Error reading response body:", err)
return
}
// Unmarshal the response into a map[string]interface{}
err = json.Unmarshal(body, &response)
if err != nil {
fmt.Println("Error unmarshaling JSON response:", err)
return
}
return
}
//go:embed backend-assets/*
var backendAssets embed.FS
var _ = Describe("API test", func() {
var app *fiber.App
var modelLoader *model.ModelLoader
var client *openai.Client
var client2 *openaigo.Client
var c context.Context
var cancel context.CancelFunc
var tmpdir string
commonOpts := []options.AppOption{
options.WithDebug(true),
options.WithDisableMessage(true),
}
Context("API with ephemeral models", func() {
BeforeEach(func() {
var err error
tmpdir, err = os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
modelLoader = model.NewModelLoader(tmpdir)
c, cancel = context.WithCancel(context.Background())
g := []gallery.GalleryModel{
{
Name: "bert",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
},
{
Name: "bert2",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
},
}
out, err := yaml.Marshal(g)
Expect(err).ToNot(HaveOccurred())
err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0644)
Expect(err).ToNot(HaveOccurred())
galleries := []gallery.Gallery{
{
Name: "test",
URL: "file://" + filepath.Join(tmpdir, "gallery_simple.yaml"),
},
}
metricsService, err := metrics.SetupMetrics()
Expect(err).ToNot(HaveOccurred())
app, err = App(
append(commonOpts,
options.WithMetrics(metricsService),
options.WithContext(c),
options.WithGalleries(galleries),
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
cancel()
app.Shutdown()
os.RemoveAll(tmpdir)
})
Context("Applying models", func() {
It("applies models from a gallery", func() {
models := getModels("http://127.0.0.1:9090/models/available")
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
ID: "test@bert2",
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
resp := map[string]interface{}{}
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
fmt.Println(response)
resp = response
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
Expect(resp["message"]).ToNot(ContainSubstring("error"))
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert2.yaml"))
Expect(err).ToNot(HaveOccurred())
_, err = os.ReadFile(filepath.Join(tmpdir, "foo.yaml"))
Expect(err).ToNot(HaveOccurred())
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["foo"]).To(Equal("bar"))
models = getModels("http://127.0.0.1:9090/models/available")
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
for _, m := range models {
if m.Name == "bert2" {
Expect(m.Installed).To(BeTrue())
} else {
Expect(m.Installed).To(BeFalse())
}
}
})
It("overrides models", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Name: "bert",
Overrides: map[string]interface{}{
"backend": "llama",
},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
Expect(err).ToNot(HaveOccurred())
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("llama"))
})
It("apply models without overrides", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Name: "bert",
Overrides: map[string]interface{}{},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
Expect(err).ToNot(HaveOccurred())
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
})
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
Name: "openllama_3b",
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
By("testing completion")
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
By("testing functions")
resp2, err := client.CreateChatCompletion(
context.TODO(),
openai.ChatCompletionRequest{
Model: "openllama_3b",
Messages: []openai.ChatCompletionMessage{
{
Role: "user",
Content: "What is the weather like in San Francisco (celsius)?",
},
},
Functions: []openai.FunctionDefinition{
openai.FunctionDefinition{
Name: "get_current_weather",
Description: "Get the current weather",
Parameters: jsonschema.Definition{
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"location": {
Type: jsonschema.String,
Description: "The city and state, e.g. San Francisco, CA",
},
"unit": {
Type: jsonschema.String,
Enum: []string{"celcius", "fahrenheit"},
},
},
Required: []string{"location"},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
var res map[string]string
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(Equal("San Francisco, California, United States"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
})
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
modelName := "codellama"
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
Name: modelName,
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
By("testing chat")
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
{
Role: "user",
Content: "How much is 2+2?",
},
}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")))
By("testing functions")
resp2, err := client.CreateChatCompletion(
context.TODO(),
openai.ChatCompletionRequest{
Model: modelName,
Messages: []openai.ChatCompletionMessage{
{
Role: "user",
Content: "What is the weather like in San Francisco (celsius)?",
},
},
Functions: []openai.FunctionDefinition{
openai.FunctionDefinition{
Name: "get_current_weather",
Description: "Get the current weather",
Parameters: jsonschema.Definition{
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"location": {
Type: jsonschema.String,
Description: "The city and state, e.g. San Francisco, CA",
},
"unit": {
Type: jsonschema.String,
Enum: []string{"celcius", "fahrenheit"},
},
},
Required: []string{"location"},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Choices)).To(Equal(1))
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
var res map[string]string
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
})
It("runs gpt4all", Label("gpt4all"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml",
Name: "gpt4all-j",
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "960s", "10s").Should(Equal(true))
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well"))
})
})
})
Context("Model gallery", func() {
BeforeEach(func() {
var err error
tmpdir, err = os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
modelLoader = model.NewModelLoader(tmpdir)
c, cancel = context.WithCancel(context.Background())
galleries := []gallery.Gallery{
{
Name: "model-gallery",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
},
}
metricsService, err := metrics.SetupMetrics()
Expect(err).ToNot(HaveOccurred())
app, err = App(
append(commonOpts,
options.WithContext(c),
options.WithMetrics(metricsService),
options.WithAudioDir(tmpdir),
options.WithImageDir(tmpdir),
options.WithGalleries(galleries),
options.WithModelLoader(modelLoader),
options.WithBackendAssets(backendAssets),
options.WithBackendAssetsOutput(tmpdir))...,
)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
cancel()
app.Shutdown()
os.RemoveAll(tmpdir)
})
It("installs and is capable to run tts", Label("tts"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
ID: "model-gallery@voice-en-us-kathleen-low",
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
fmt.Println(response)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
// An HTTP Post to the /tts endpoint should return a wav audio file
resp, err := http.Post("http://127.0.0.1:9090/tts", "application/json", bytes.NewBuffer([]byte(`{"input": "Hello world", "model": "en-us-kathleen-low.onnx"}`)))
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
dat, err := io.ReadAll(resp.Body)
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
})
It("installs and is capable to generate images", Label("stablediffusion"), func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
ID: "model-gallery@stablediffusion",
Overrides: map[string]interface{}{
"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
fmt.Println(response)
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
resp, err := http.Post(
"http://127.0.0.1:9090/v1/images/generations",
"application/json",
bytes.NewBuffer([]byte(`{
"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
"mode": 2, "seed":9000,
"size": "256x256", "n":2}`)))
// The response should contain an URL
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
dat, err := io.ReadAll(resp.Body)
Expect(err).ToNot(HaveOccurred(), string(dat))
Expect(string(dat)).To(ContainSubstring("http://127.0.0.1:9090/"), string(dat))
Expect(string(dat)).To(ContainSubstring(".png"), string(dat))
})
})
Context("API query", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
c, cancel = context.WithCancel(context.Background())
metricsService, err := metrics.SetupMetrics()
Expect(err).ToNot(HaveOccurred())
app, err = App(
append(commonOpts,
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
options.WithContext(c),
options.WithModelLoader(modelLoader),
options.WithMetrics(metricsService),
)...)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
cancel()
app.Shutdown()
})
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
})
It("can generate completions", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
It("can generate chat completions ", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate completions from model configs", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
It("can generate chat completions from model configs", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("returns errors", func() {
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
})
It("transcribes audio", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateTranscription(
context.Background(),
openai.AudioRequest{
Model: openai.Whisper1,
FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
})
It("calculate embeddings", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun", "cat"},
},
)
Expect(err).ToNot(HaveOccurred(), err)
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
})
Context("External gRPC calls", func() {
It("calculate embeddings with sentencetransformers", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaCodeSearchCode,
Input: []string{"sun", "cat"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaCodeSearchCode,
Input: []string{"sun"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
})
})
Context("backends", func() {
It("runs rwkv completion", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
})
Expect(err).ToNot(HaveOccurred())
defer stream.Close()
tokens := 0
text := ""
for {
response, err := stream.Recv()
if errors.Is(err, io.EOF) {
break
}
Expect(err).ToNot(HaveOccurred())
text += response.Choices[0].Text
tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(ContainSubstring("five"))
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
It("runs rwkv chat completion", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
defer stream.Close()
tokens := 0
text := ""
for {
response, err := stream.Recv()
if errors.Is(err, io.EOF) {
break
}
Expect(err).ToNot(HaveOccurred())
text += response.Choices[0].Delta.Content
tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
})
})
Context("Config file", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
c, cancel = context.WithCancel(context.Background())
metricsService, err := metrics.SetupMetrics()
Expect(err).ToNot(HaveOccurred())
app, err = App(
append(commonOpts,
options.WithContext(c),
options.WithMetrics(metricsService),
options.WithModelLoader(modelLoader),
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
cancel()
app.Shutdown()
})
It("can generate chat completions from config file (list1)", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate chat completions from config file (list2)", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate edit completions from config file", func() {
request := openaigo.EditCreateRequestBody{
Model: "list2",
Instruction: "foo",
Input: "bar",
}
resp, err := client2.CreateEdit(context.Background(), request)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
})
})

13
api/apt_suite_test.go Normal file
View File

@@ -0,0 +1,13 @@
package api_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI test suite")
}

92
api/backend/embeddings.go Normal file
View File

@@ -0,0 +1,92 @@
package backend
import (
"fmt"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/grpc"
model "github.com/go-skynet/LocalAI/pkg/model"
)
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.Config, o *options.Option) (func() ([]float32, error), error) {
if !c.Embeddings {
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
}
modelFile := c.Model
grpcOpts := gRPCModelOpts(c)
var inferenceModel interface{}
var err error
opts := modelOpts(c, o, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(c.Threads)),
model.WithAssetDir(o.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(o.Context),
})
if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
opts = append(opts, model.WithBackendString(c.Backend))
inferenceModel, err = loader.BackendLoader(opts...)
}
if err != nil {
return nil, err
}
var fn func() ([]float32, error)
switch model := inferenceModel.(type) {
case grpc.Backend:
fn = func() ([]float32, error) {
predictOptions := gRPCPredictOpts(c, loader.ModelPath)
if len(tokens) > 0 {
embeds := []int32{}
for _, t := range tokens {
embeds = append(embeds, int32(t))
}
predictOptions.EmbeddingTokens = embeds
res, err := model.Embeddings(o.Context, predictOptions)
if err != nil {
return nil, err
}
return res.Embeddings, nil
}
predictOptions.Embeddings = s
res, err := model.Embeddings(o.Context, predictOptions)
if err != nil {
return nil, err
}
return res.Embeddings, nil
}
default:
fn = func() ([]float32, error) {
return nil, fmt.Errorf("embeddings not supported by the backend")
}
}
return func() ([]float32, error) {
embeds, err := fn()
if err != nil {
return embeds, err
}
// Remove trailing 0s
for i := len(embeds) - 1; i >= 0; i-- {
if embeds[i] == 0.0 {
embeds = embeds[:i]
} else {
break
}
}
return embeds, nil
}, nil
}

61
api/backend/image.go Normal file
View File

@@ -0,0 +1,61 @@
package backend
import (
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/go-skynet/LocalAI/pkg/model"
)
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
opts := modelOpts(c, o, []model.Option{
model.WithBackendString(c.Backend),
model.WithAssetDir(o.AssetsDestination),
model.WithThreads(uint32(c.Threads)),
model.WithContext(o.Context),
model.WithModel(c.Model),
model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
PipelineType: c.Diffusers.PipelineType,
CFGScale: c.Diffusers.CFGScale,
LoraAdapter: c.LoraAdapter,
LoraScale: c.LoraScale,
LoraBase: c.LoraBase,
IMG2IMG: c.Diffusers.IMG2IMG,
CLIPModel: c.Diffusers.ClipModel,
CLIPSubfolder: c.Diffusers.ClipSubFolder,
CLIPSkip: int32(c.Diffusers.ClipSkip),
ControlNet: c.Diffusers.ControlNet,
}),
})
inferenceModel, err := loader.BackendLoader(
opts...,
)
if err != nil {
return nil, err
}
fn := func() error {
_, err := inferenceModel.GenerateImage(
o.Context,
&proto.GenerateImageRequest{
Height: int32(height),
Width: int32(width),
Mode: int32(mode),
Step: int32(step),
Seed: int32(seed),
CLIPSkip: int32(c.Diffusers.ClipSkip),
PositivePrompt: positive_prompt,
NegativePrompt: negative_prompt,
Dst: dst,
Src: src,
EnableParameters: c.Diffusers.EnableParameters,
})
return err
}
return fn, nil
}

167
api/backend/llm.go Normal file
View File

@@ -0,0 +1,167 @@
package backend
import (
"context"
"os"
"regexp"
"strings"
"sync"
"unicode/utf8"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/go-skynet/LocalAI/pkg/grpc"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
type LLMResponse struct {
Response string // should this be []byte?
Usage TokenUsage
}
type TokenUsage struct {
Prompt int
Completion int
}
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
grpcOpts := gRPCModelOpts(c)
var inferenceModel grpc.Backend
var err error
opts := modelOpts(c, o, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
model.WithAssetDir(o.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(o.Context),
})
if c.Backend != "" {
opts = append(opts, model.WithBackendString(c.Backend))
}
// Check if the modelFile exists, if it doesn't try to load it from the gallery
if o.AutoloadGalleries { // experimental
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
utils.ResetDownloadTimers()
// if we failed to load the model, we try to download it
err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
if err != nil {
return nil, err
}
}
}
if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
inferenceModel, err = loader.BackendLoader(opts...)
}
if err != nil {
return nil, err
}
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts.Prompt = s
opts.Images = images
tokenUsage := TokenUsage{}
// check the per-model feature flag for usage, since tokenCallback may have a cost.
// Defaults to off as for now it is still experimental
if c.FeatureFlag.Enabled("usage") {
userTokenCallback := tokenCallback
if userTokenCallback == nil {
userTokenCallback = func(token string, usage TokenUsage) bool {
return true
}
}
promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts)
if pErr == nil && promptInfo.Length > 0 {
tokenUsage.Prompt = int(promptInfo.Length)
}
tokenCallback = func(token string, usage TokenUsage) bool {
tokenUsage.Completion++
return userTokenCallback(token, tokenUsage)
}
}
if tokenCallback != nil {
ss := ""
var partialRune []byte
err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
partialRune = append(partialRune, chars...)
for len(partialRune) > 0 {
r, size := utf8.DecodeRune(partialRune)
if r == utf8.RuneError {
// incomplete rune, wait for more bytes
break
}
tokenCallback(string(r), tokenUsage)
ss += string(r)
partialRune = partialRune[size:]
}
})
return LLMResponse{
Response: ss,
Usage: tokenUsage,
}, err
} else {
// TODO: Is the chicken bit the only way to get here? is that acceptable?
reply, err := inferenceModel.Predict(ctx, opts)
if err != nil {
return LLMResponse{}, err
}
return LLMResponse{
Response: string(reply.Message),
Usage: tokenUsage,
}, err
}
}
return fn, nil
}
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
var mu sync.Mutex = sync.Mutex{}
func Finetune(config config.Config, input, prediction string) string {
if config.Echo {
prediction = input + prediction
}
for _, c := range config.Cutstrings {
mu.Lock()
reg, ok := cutstrings[c]
if !ok {
cutstrings[c] = regexp.MustCompile(c)
reg = cutstrings[c]
}
mu.Unlock()
prediction = reg.ReplaceAllString(prediction, "")
}
for _, c := range config.TrimSpace {
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
}
for _, c := range config.TrimSuffix {
prediction = strings.TrimSpace(strings.TrimSuffix(prediction, c))
}
return prediction
}

129
api/backend/options.go Normal file
View File

@@ -0,0 +1,129 @@
package backend
import (
"os"
"path/filepath"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/go-skynet/LocalAI/pkg/model"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
)
func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.Option {
if o.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
}
if o.ParallelBackendRequests {
opts = append(opts, model.EnableParallelRequests)
}
if c.GRPC.Attempts != 0 {
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
}
if c.GRPC.AttemptsSleepTime != 0 {
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
}
for k, v := range o.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
}
return opts
}
func gRPCModelOpts(c config.Config) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
}
return &pb.ModelOptions{
ContextSize: int32(c.ContextSize),
Seed: int32(c.Seed),
NBatch: int32(b),
NoMulMatQ: c.NoMulMatQ,
CUDA: c.CUDA, // diffusers, transformers
DraftModel: c.DraftModel,
AudioPath: c.VallE.AudioPath,
Quantization: c.Quantization,
MMProj: c.MMProj,
YarnExtFactor: c.YarnExtFactor,
YarnAttnFactor: c.YarnAttnFactor,
YarnBetaFast: c.YarnBetaFast,
YarnBetaSlow: c.YarnBetaSlow,
LoraAdapter: c.LoraAdapter,
LoraBase: c.LoraBase,
LoraScale: c.LoraScale,
NGQA: c.NGQA,
RMSNormEps: c.RMSNormEps,
F16Memory: c.F16,
MLock: c.MMlock,
RopeFreqBase: c.RopeFreqBase,
RopeScaling: c.RopeScaling,
Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: c.Embeddings,
LowVRAM: c.LowVRAM,
NGPULayers: int32(c.NGPULayers),
MMap: c.MMap,
MainGPU: c.MainGPU,
Threads: int32(c.Threads),
TensorSplit: c.TensorSplit,
// AutoGPTQ
ModelBaseName: c.AutoGPTQ.ModelBaseName,
Device: c.AutoGPTQ.Device,
UseTriton: c.AutoGPTQ.Triton,
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
// RWKV
Tokenizer: c.Tokenizer,
}
}
func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions {
promptCachePath := ""
if c.PromptCachePath != "" {
p := filepath.Join(modelPath, c.PromptCachePath)
os.MkdirAll(filepath.Dir(p), 0755)
promptCachePath = p
}
return &pb.PredictOptions{
Temperature: float32(c.Temperature),
TopP: float32(c.TopP),
NDraft: c.NDraft,
TopK: int32(c.TopK),
Tokens: int32(c.Maxtokens),
Threads: int32(c.Threads),
PromptCacheAll: c.PromptCacheAll,
PromptCacheRO: c.PromptCacheRO,
PromptCachePath: promptCachePath,
F16KV: c.F16,
DebugMode: c.Debug,
Grammar: c.Grammar,
NegativePromptScale: c.NegativePromptScale,
RopeFreqBase: c.RopeFreqBase,
RopeFreqScale: c.RopeFreqScale,
NegativePrompt: c.NegativePrompt,
Mirostat: int32(c.LLMConfig.Mirostat),
MirostatETA: float32(c.LLMConfig.MirostatETA),
MirostatTAU: float32(c.LLMConfig.MirostatTAU),
Debug: c.Debug,
StopPrompts: c.StopWords,
Repeat: int32(c.RepeatPenalty),
NKeep: int32(c.Keep),
Batch: int32(c.Batch),
IgnoreEOS: c.IgnoreEOS,
Seed: int32(c.Seed),
FrequencyPenalty: float32(c.FrequencyPenalty),
MLock: c.MMlock,
MMap: c.MMap,
MainGPU: c.MainGPU,
TensorSplit: c.TensorSplit,
TailFreeSamplingZ: float32(c.TFZ),
TypicalP: float32(c.TypicalP),
}
}

39
api/backend/transcript.go Normal file
View File

@@ -0,0 +1,39 @@
package backend
import (
"context"
"fmt"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/go-skynet/LocalAI/pkg/model"
)
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*schema.Result, error) {
opts := modelOpts(c, o, []model.Option{
model.WithBackendString(model.WhisperBackend),
model.WithModel(c.Model),
model.WithContext(o.Context),
model.WithThreads(uint32(c.Threads)),
model.WithAssetDir(o.AssetsDestination),
})
whisperModel, err := o.Loader.BackendLoader(opts...)
if err != nil {
return nil, err
}
if whisperModel == nil {
return nil, fmt.Errorf("could not load whisper model")
}
return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
Dst: audio,
Language: language,
Threads: uint32(c.Threads),
})
}

79
api/backend/tts.go Normal file
View File

@@ -0,0 +1,79 @@
package backend
import (
"context"
"fmt"
"os"
"path/filepath"
api_config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
func generateUniqueFileName(dir, baseName, ext string) string {
counter := 1
fileName := baseName + ext
for {
filePath := filepath.Join(dir, fileName)
_, err := os.Stat(filePath)
if os.IsNotExist(err) {
return fileName
}
counter++
fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
}
}
func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
}
opts := modelOpts(api_config.Config{}, o, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(o.Context),
model.WithAssetDir(o.AssetsDestination),
})
piperModel, err := o.Loader.BackendLoader(opts...)
if err != nil {
return "", nil, err
}
if piperModel == nil {
return "", nil, fmt.Errorf("could not load piper model")
}
if err := os.MkdirAll(o.AudioDir, 0755); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
}
fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav")
filePath := filepath.Join(o.AudioDir, fileName)
// If the model file is not empty, we pass it joined with the model path
modelPath := ""
if modelFile != "" {
if bb != model.TransformersMusicGen {
modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
return "", nil, err
}
} else {
modelPath = modelFile
}
}
res, err := piperModel.TTS(context.Background(), &proto.TTSRequest{
Text: text,
Model: modelPath,
Dst: filePath,
})
return filePath, res, err
}

374
api/config/config.go Normal file
View File

@@ -0,0 +1,374 @@
package api_config
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"sync"
"github.com/go-skynet/LocalAI/pkg/downloader"
"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
)
type Config struct {
PredictionOptions `yaml:"parameters"`
Name string `yaml:"name"`
F16 bool `yaml:"f16"`
Threads int `yaml:"threads"`
Debug bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
PromptStrings, InputStrings []string `yaml:"-"`
InputToken [][]int `yaml:"-"`
functionCallString, functionCallNameString string `yaml:"-"`
FunctionsConfig Functions `yaml:"function"`
FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig `yaml:",inline"`
// AutoGPTQ specifics
AutoGPTQ AutoGPTQ `yaml:"autogptq"`
// Diffusers
Diffusers Diffusers `yaml:"diffusers"`
Step int `yaml:"step"`
// GRPC Options
GRPC GRPC `yaml:"grpc"`
// Vall-e-x
VallE VallE `yaml:"vall-e"`
// CUDA
// Explicitly enable CUDA or not (some backends might need it)
CUDA bool `yaml:"cuda"`
DownloadFiles []File `yaml:"download_files"`
Description string `yaml:"description"`
Usage string `yaml:"usage"`
}
type File struct {
Filename string `yaml:"filename" json:"filename"`
SHA256 string `yaml:"sha256" json:"sha256"`
URI string `yaml:"uri" json:"uri"`
}
type VallE struct {
AudioPath string `yaml:"audio_path"`
}
type FeatureFlag map[string]*bool
func (ff FeatureFlag) Enabled(s string) bool {
v, exist := ff[s]
return exist && v != nil && *v
}
type GRPC struct {
Attempts int `yaml:"attempts"`
AttemptsSleepTime int `yaml:"attempts_sleep_time"`
}
type Diffusers struct {
CUDA bool `yaml:"cuda"`
PipelineType string `yaml:"pipeline_type"`
SchedulerType string `yaml:"scheduler_type"`
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
ClipModel string `yaml:"clip_model"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net"`
}
type LLMConfig struct {
SystemPrompt string `yaml:"system_prompt"`
TensorSplit string `yaml:"tensor_split"`
MainGPU string `yaml:"main_gpu"`
RMSNormEps float32 `yaml:"rms_norm_eps"`
NGQA int32 `yaml:"ngqa"`
PromptCachePath string `yaml:"prompt_cache_path"`
PromptCacheAll bool `yaml:"prompt_cache_all"`
PromptCacheRO bool `yaml:"prompt_cache_ro"`
MirostatETA float64 `yaml:"mirostat_eta"`
MirostatTAU float64 `yaml:"mirostat_tau"`
Mirostat int `yaml:"mirostat"`
NGPULayers int `yaml:"gpu_layers"`
MMap bool `yaml:"mmap"`
MMlock bool `yaml:"mmlock"`
LowVRAM bool `yaml:"low_vram"`
Grammar string `yaml:"grammar"`
StopWords []string `yaml:"stopwords"`
Cutstrings []string `yaml:"cutstrings"`
TrimSpace []string `yaml:"trimspace"`
TrimSuffix []string `yaml:"trimsuffix"`
ContextSize int `yaml:"context_size"`
NUMA bool `yaml:"numa"`
LoraAdapter string `yaml:"lora_adapter"`
LoraBase string `yaml:"lora_base"`
LoraScale float32 `yaml:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq"`
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
MMProj string `yaml:"mmproj"`
RopeScaling string `yaml:"rope_scaling"`
ModelType string `yaml:"type"`
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
}
type AutoGPTQ struct {
ModelBaseName string `yaml:"model_base_name"`
Device string `yaml:"device"`
Triton bool `yaml:"triton"`
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
}
type Functions struct {
DisableNoAction bool `yaml:"disable_no_action"`
NoActionFunctionName string `yaml:"no_action_function_name"`
NoActionDescriptionName string `yaml:"no_action_description_name"`
}
type TemplateConfig struct {
Chat string `yaml:"chat"`
ChatMessage string `yaml:"chat_message"`
Completion string `yaml:"completion"`
Edit string `yaml:"edit"`
Functions string `yaml:"function"`
}
type ConfigLoader struct {
configs map[string]Config
sync.Mutex
}
func (c *Config) SetFunctionCallString(s string) {
c.functionCallString = s
}
func (c *Config) SetFunctionCallNameString(s string) {
c.functionCallNameString = s
}
func (c *Config) ShouldUseFunctions() bool {
return ((c.functionCallString != "none" || c.functionCallString == "") || c.ShouldCallSpecificFunction())
}
func (c *Config) ShouldCallSpecificFunction() bool {
return len(c.functionCallNameString) > 0
}
func (c *Config) FunctionToCall() string {
return c.functionCallNameString
}
func defaultPredictOptions(modelFile string) PredictionOptions {
return PredictionOptions{
TopP: 0.7,
TopK: 80,
Maxtokens: 512,
Temperature: 0.9,
Model: modelFile,
}
}
func DefaultConfig(modelFile string) *Config {
return &Config{
PredictionOptions: defaultPredictOptions(modelFile),
}
}
func NewConfigLoader() *ConfigLoader {
return &ConfigLoader{
configs: make(map[string]Config),
}
}
func ReadConfigFile(file string) ([]*Config, error) {
c := &[]*Config{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
return *c, nil
}
func ReadConfig(file string) (*Config, error) {
c := &Config{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
return c, nil
}
func (cm *ConfigLoader) LoadConfigFile(file string) error {
cm.Lock()
defer cm.Unlock()
c, err := ReadConfigFile(file)
if err != nil {
return fmt.Errorf("cannot load config file: %w", err)
}
for _, cc := range c {
cm.configs[cc.Name] = *cc
}
return nil
}
func (cm *ConfigLoader) LoadConfig(file string) error {
cm.Lock()
defer cm.Unlock()
c, err := ReadConfig(file)
if err != nil {
return fmt.Errorf("cannot read config file: %w", err)
}
cm.configs[c.Name] = *c
return nil
}
func (cm *ConfigLoader) GetConfig(m string) (Config, bool) {
cm.Lock()
defer cm.Unlock()
v, exists := cm.configs[m]
return v, exists
}
func (cm *ConfigLoader) GetAllConfigs() []Config {
cm.Lock()
defer cm.Unlock()
var res []Config
for _, v := range cm.configs {
res = append(res, v)
}
return res
}
func (cm *ConfigLoader) ListConfigs() []string {
cm.Lock()
defer cm.Unlock()
var res []string
for k := range cm.configs {
res = append(res, k)
}
return res
}
// Preload prepare models if they are not local but url or huggingface repositories
func (cm *ConfigLoader) Preload(modelPath string) error {
cm.Lock()
defer cm.Unlock()
status := func(fileName, current, total string, percent float64) {
utils.DisplayDownloadFunction(fileName, current, total, percent)
}
log.Info().Msgf("Preloading models from %s", modelPath)
for i, config := range cm.configs {
// Download files and verify their SHA
for _, file := range config.DownloadFiles {
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
return err
}
// Create file path
filePath := filepath.Join(modelPath, file.Filename)
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
return err
}
}
modelURL := config.PredictionOptions.Model
modelURL = downloader.ConvertURL(modelURL)
if downloader.LooksLikeURL(modelURL) {
// md5 of model name
md5Name := utils.MD5(modelURL)
// check if file exists
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
if err != nil {
return err
}
}
cc := cm.configs[i]
c := &cc
c.PredictionOptions.Model = md5Name
cm.configs[i] = *c
}
if cm.configs[i].Name != "" {
log.Info().Msgf("Model name: %s", cm.configs[i].Name)
}
if cm.configs[i].Description != "" {
log.Info().Msgf("Model description: %s", cm.configs[i].Description)
}
if cm.configs[i].Usage != "" {
log.Info().Msgf("Model usage: \n%s", cm.configs[i].Usage)
}
}
return nil
}
func (cm *ConfigLoader) LoadConfigs(path string) error {
cm.Lock()
defer cm.Unlock()
entries, err := os.ReadDir(path)
if err != nil {
return err
}
files := make([]fs.FileInfo, 0, len(entries))
for _, entry := range entries {
info, err := entry.Info()
if err != nil {
return err
}
files = append(files, info)
}
for _, file := range files {
// Skip templates, YAML and .keep files
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
continue
}
c, err := ReadConfig(filepath.Join(path, file.Name()))
if err == nil {
cm.configs[c.Name] = *c
}
}
return nil
}

56
api/config/config_test.go Normal file
View File

@@ -0,0 +1,56 @@
package api_config_test
import (
"os"
. "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Test cases for config related functions", func() {
var (
configFile string
)
Context("Test Read configuration functions", func() {
configFile = os.Getenv("CONFIG_FILE")
It("Test ReadConfigFile", func() {
config, err := ReadConfigFile(configFile)
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
// two configs in config.yaml
Expect(config[0].Name).To(Equal("list1"))
Expect(config[1].Name).To(Equal("list2"))
})
It("Test LoadConfigs", func() {
cm := NewConfigLoader()
opts := options.NewOptions()
modelLoader := model.NewModelLoader(os.Getenv("MODELS_PATH"))
options.WithModelLoader(modelLoader)(opts)
err := cm.LoadConfigs(opts.Loader.ModelPath)
Expect(err).To(BeNil())
Expect(cm.ListConfigs()).ToNot(BeNil())
// config should includes gpt4all models's api.config
Expect(cm.ListConfigs()).To(ContainElements("gpt4all"))
// config should includes gpt2 models's api.config
Expect(cm.ListConfigs()).To(ContainElements("gpt4all-2"))
// config should includes text-embedding-ada-002 models's api.config
Expect(cm.ListConfigs()).To(ContainElements("text-embedding-ada-002"))
// config should includes rwkv_test models's api.config
Expect(cm.ListConfigs()).To(ContainElements("rwkv_test"))
// config should includes whisper-1 models's api.config
Expect(cm.ListConfigs()).To(ContainElements("whisper-1"))
})
})
})

50
api/config/prediction.go Normal file
View File

@@ -0,0 +1,50 @@
package api_config
type PredictionOptions struct {
// Also part of the OpenAI official spec
Model string `json:"model" yaml:"model"`
// Also part of the OpenAI official spec
Language string `json:"language"`
// Also part of the OpenAI official spec. use it for returning multiple results
N int `json:"n"`
// Common options between all the API calls, part of the OpenAI spec
TopP float64 `json:"top_p" yaml:"top_p"`
TopK int `json:"top_k" yaml:"top_k"`
Temperature float64 `json:"temperature" yaml:"temperature"`
Maxtokens int `json:"max_tokens" yaml:"max_tokens"`
Echo bool `json:"echo"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch" yaml:"batch"`
F16 bool `json:"f16" yaml:"f16"`
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
Keep int `json:"n_keep" yaml:"n_keep"`
MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
Mirostat int `json:"mirostat" yaml:"mirostat"`
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
TFZ float64 `json:"tfz" yaml:"tfz"`
TypicalP float64 `json:"typical_p" yaml:"typical_p"`
Seed int `json:"seed" yaml:"seed"`
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
// AutoGPTQ
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
// Diffusers
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
// RWKV (?)
Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
}

View File

@@ -0,0 +1,162 @@
package localai
import (
"context"
"fmt"
"strings"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/api/options"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
gopsutil "github.com/shirou/gopsutil/v3/process"
)
type BackendMonitorRequest struct {
Model string `json:"model" yaml:"model"`
}
type BackendMonitorResponse struct {
MemoryInfo *gopsutil.MemoryInfoStat
MemoryPercent float32
CPUPercent float64
}
type BackendMonitor struct {
configLoader *config.ConfigLoader
options *options.Option // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
}
func NewBackendMonitor(configLoader *config.ConfigLoader, options *options.Option) BackendMonitor {
return BackendMonitor{
configLoader: configLoader,
options: options,
}
}
func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*BackendMonitorResponse, error) {
config, exists := bm.configLoader.GetConfig(model)
var backend string
if exists {
backend = config.Model
} else {
// Last ditch effort: use it raw, see if a backend happens to match.
backend = model
}
if !strings.HasSuffix(backend, ".bin") {
backend = fmt.Sprintf("%s.bin", backend)
}
pid, err := bm.options.Loader.GetGRPCPID(backend)
if err != nil {
log.Error().Msgf("model %s : failed to find pid %+v", model, err)
return nil, err
}
// Name is slightly frightening but this does _not_ create a new process, rather it looks up an existing process by PID.
backendProcess, err := gopsutil.NewProcess(int32(pid))
if err != nil {
log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err)
return nil, err
}
memInfo, err := backendProcess.MemoryInfo()
if err != nil {
log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err)
return nil, err
}
memPercent, err := backendProcess.MemoryPercent()
if err != nil {
log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err)
return nil, err
}
cpuPercent, err := backendProcess.CPUPercent()
if err != nil {
log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err)
return nil, err
}
return &BackendMonitorResponse{
MemoryInfo: memInfo,
MemoryPercent: memPercent,
CPUPercent: cpuPercent,
}, nil
}
func (bm BackendMonitor) getModelLoaderIDFromCtx(c *fiber.Ctx) (string, error) {
input := new(BackendMonitorRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return "", err
}
config, exists := bm.configLoader.GetConfig(input.Model)
var backendId string
if exists {
backendId = config.Model
} else {
// Last ditch effort: use it raw, see if a backend happens to match.
backendId = input.Model
}
if !strings.HasSuffix(backendId, ".bin") {
backendId = fmt.Sprintf("%s.bin", backendId)
}
return backendId, nil
}
func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
backendId, err := bm.getModelLoaderIDFromCtx(c)
if err != nil {
return err
}
model := bm.options.Loader.CheckIsLoaded(backendId)
if model == "" {
return fmt.Errorf("backend %s is not currently loaded", backendId)
}
status, rpcErr := model.GRPC(false, nil).Status(context.TODO())
if rpcErr != nil {
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
val, slbErr := bm.SampleLocalBackendProcess(backendId)
if slbErr != nil {
return fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
}
return c.JSON(proto.StatusResponse{
State: proto.StatusResponse_ERROR,
Memory: &proto.MemoryUsageData{
Total: val.MemoryInfo.VMS,
Breakdown: map[string]uint64{
"gopsutil-RSS": val.MemoryInfo.RSS,
},
},
})
}
return c.JSON(status)
}
}
func BackendShutdownEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
backendId, err := bm.getModelLoaderIDFromCtx(c)
if err != nil {
return err
}
return bm.options.Loader.ShutdownModel(backendId)
}
}

326
api/localai/gallery.go Normal file
View File

@@ -0,0 +1,326 @@
package localai
import (
"context"
"fmt"
"os"
"slices"
"strings"
"sync"
json "github.com/json-iterator/go"
"gopkg.in/yaml.v3"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"github.com/rs/zerolog/log"
)
type galleryOp struct {
req gallery.GalleryModel
id string
galleries []gallery.Gallery
galleryName string
}
type galleryOpStatus struct {
FileName string `json:"file_name"`
Error error `json:"error"`
Processed bool `json:"processed"`
Message string `json:"message"`
Progress float64 `json:"progress"`
TotalFileSize string `json:"file_size"`
DownloadedFileSize string `json:"downloaded_size"`
}
type galleryApplier struct {
modelPath string
sync.Mutex
C chan galleryOp
statuses map[string]*galleryOpStatus
}
func NewGalleryService(modelPath string) *galleryApplier {
return &galleryApplier{
modelPath: modelPath,
C: make(chan galleryOp),
statuses: make(map[string]*galleryOpStatus),
}
}
func prepareModel(modelPath string, req gallery.GalleryModel, cm *config.ConfigLoader, downloadStatus func(string, string, string, float64)) error {
config, err := gallery.GetGalleryConfigFromURL(req.URL)
if err != nil {
return err
}
config.Files = append(config.Files, req.AdditionalFiles...)
return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
}
func (g *galleryApplier) updateStatus(s string, op *galleryOpStatus) {
g.Lock()
defer g.Unlock()
g.statuses[s] = op
}
func (g *galleryApplier) getStatus(s string) *galleryOpStatus {
g.Lock()
defer g.Unlock()
return g.statuses[s]
}
func (g *galleryApplier) getAllStatus() map[string]*galleryOpStatus {
g.Lock()
defer g.Unlock()
return g.statuses
}
func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
go func() {
for {
select {
case <-c.Done():
return
case op := <-g.C:
utils.ResetDownloadTimers()
g.updateStatus(op.id, &galleryOpStatus{Message: "processing", Progress: 0})
// updates the status with an error
updateError := func(e error) {
g.updateStatus(op.id, &galleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()})
}
// displayDownload displays the download progress
progressCallback := func(fileName string, current string, total string, percentage float64) {
g.updateStatus(op.id, &galleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current})
utils.DisplayDownloadFunction(fileName, current, total, percentage)
}
var err error
// if the request contains a gallery name, we apply the gallery from the gallery list
if op.galleryName != "" {
if strings.Contains(op.galleryName, "@") {
err = gallery.InstallModelFromGallery(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback)
} else {
err = gallery.InstallModelFromGalleryByName(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback)
}
} else {
err = prepareModel(g.modelPath, op.req, cm, progressCallback)
}
if err != nil {
updateError(err)
continue
}
// Reload models
err = cm.LoadConfigs(g.modelPath)
if err != nil {
updateError(err)
continue
}
err = cm.Preload(g.modelPath)
if err != nil {
updateError(err)
continue
}
g.updateStatus(op.id, &galleryOpStatus{Processed: true, Message: "completed", Progress: 100})
}
}
}()
}
type galleryModel struct {
gallery.GalleryModel `yaml:",inline"` // https://github.com/go-yaml/yaml/issues/63
ID string `json:"id"`
}
func processRequests(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery, requests []galleryModel) error {
var err error
for _, r := range requests {
utils.ResetDownloadTimers()
if r.ID == "" {
err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction)
} else {
if strings.Contains(r.ID, "@") {
err = gallery.InstallModelFromGallery(
galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
} else {
err = gallery.InstallModelFromGalleryByName(
galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
}
}
}
return err
}
func ApplyGalleryFromFile(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error {
dat, err := os.ReadFile(s)
if err != nil {
return err
}
var requests []galleryModel
if err := yaml.Unmarshal(dat, &requests); err != nil {
return err
}
return processRequests(modelPath, s, cm, galleries, requests)
}
func ApplyGalleryFromString(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error {
var requests []galleryModel
err := json.Unmarshal([]byte(s), &requests)
if err != nil {
return err
}
return processRequests(modelPath, s, cm, galleries, requests)
}
/// Endpoint Service
type ModelGalleryService struct {
galleries []gallery.Gallery
modelPath string
galleryApplier *galleryApplier
}
type GalleryModel struct {
ID string `json:"id"`
gallery.GalleryModel
}
func CreateModelGalleryService(galleries []gallery.Gallery, modelPath string, galleryApplier *galleryApplier) ModelGalleryService {
return ModelGalleryService{
galleries: galleries,
modelPath: modelPath,
galleryApplier: galleryApplier,
}
}
func (mgs *ModelGalleryService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
status := mgs.galleryApplier.getStatus(c.Params("uuid"))
if status == nil {
return fmt.Errorf("could not find any status for ID")
}
return c.JSON(status)
}
}
func (mgs *ModelGalleryService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
return c.JSON(mgs.galleryApplier.getAllStatus())
}
}
func (mgs *ModelGalleryService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(GalleryModel)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
uuid, err := uuid.NewUUID()
if err != nil {
return err
}
mgs.galleryApplier.C <- galleryOp{
req: input.GalleryModel,
id: uuid.String(),
galleryName: input.ID,
galleries: mgs.galleries,
}
return c.JSON(struct {
ID string `json:"uuid"`
StatusURL string `json:"status"`
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
}
}
func (mgs *ModelGalleryService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
if err != nil {
return err
}
log.Debug().Msgf("Models found from galleries: %+v", models)
for _, m := range models {
log.Debug().Msgf("Model found from galleries: %+v", m)
}
dat, err := json.Marshal(models)
if err != nil {
return err
}
return c.Send(dat)
}
}
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
func (mgs *ModelGalleryService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
log.Debug().Msgf("Listing model galleries %+v", mgs.galleries)
dat, err := json.Marshal(mgs.galleries)
if err != nil {
return err
}
return c.Send(dat)
}
}
func (mgs *ModelGalleryService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(gallery.Gallery)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
return gallery.Name == input.Name
}) {
return fmt.Errorf("%s already exists", input.Name)
}
dat, err := json.Marshal(mgs.galleries)
if err != nil {
return err
}
log.Debug().Msgf("Adding %+v to gallery list", *input)
mgs.galleries = append(mgs.galleries, *input)
return c.Send(dat)
}
}
func (mgs *ModelGalleryService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(gallery.Gallery)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
return gallery.Name == input.Name
}) {
return fmt.Errorf("%s is not currently registered", input.Name)
}
mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
return gallery.Name == input.Name
})
return c.Send(nil)
}
}

32
api/localai/localai.go Normal file
View File

@@ -0,0 +1,32 @@
package localai
import (
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/gofiber/fiber/v2"
)
type TTSRequest struct {
Model string `json:"model" yaml:"model"`
Input string `json:"input" yaml:"input"`
Backend string `json:"backend" yaml:"backend"`
}
func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(TTSRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o)
if err != nil {
return err
}
return c.Download(filePath)
}
}

399
api/openai/chat.go Normal file
View File

@@ -0,0 +1,399 @@
package openai
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/go-skynet/LocalAI/pkg/grammar"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
emptyMessage := ""
id := uuid.New().String()
created := int(time.Now().Unix())
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
initialMessage := schema.OpenAIResponse{
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
resp := schema.OpenAIResponse{
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
Object: "chat.completion.chunk",
Usage: schema.OpenAIUsage{
PromptTokens: usage.Prompt,
CompletionTokens: usage.Completion,
TotalTokens: usage.Prompt + usage.Completion,
},
}
responses <- resp
return true
})
close(responses)
}
return func(c *fiber.Ctx) error {
processFunctions := false
funcs := grammar.Functions{}
modelFile, input, err := readInput(c, o, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Configuration read: %+v", config)
// Allow the user to set custom actions via config file
// to be "embedded" in each model
noActionName := "answer"
noActionDescription := "use this action to answer without performing any action"
if config.FunctionsConfig.NoActionFunctionName != "" {
noActionName = config.FunctionsConfig.NoActionFunctionName
}
if config.FunctionsConfig.NoActionDescriptionName != "" {
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
}
if input.ResponseFormat.Type == "json_object" {
input.Grammar = grammar.JSONBNF
}
// process functions if we have any defined or if we have a function call string
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
log.Debug().Msgf("Response needs to process functions")
processFunctions = true
noActionGrammar := grammar.Function{
Name: noActionName,
Description: noActionDescription,
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"message": map[string]interface{}{
"type": "string",
"description": "The message to reply the user with",
}},
},
}
// Append the no action function
funcs = append(funcs, input.Functions...)
if !config.FunctionsConfig.DisableNoAction {
funcs = append(funcs, noActionGrammar)
}
// Force picking one of the functions by the request
if config.FunctionToCall() != "" {
funcs = funcs.Select(config.FunctionToCall())
}
// Update input grammar
jsStruct := funcs.ToJSONStructure()
config.Grammar = jsStruct.Grammar("")
} else if input.JSONFunctionGrammarObject != nil {
config.Grammar = input.JSONFunctionGrammarObject.Grammar("")
}
// functions are not supported in stream mode (yet?)
toStream := input.Stream && !processFunctions
log.Debug().Msgf("Parameters: %+v", config)
var predInput string
suppressConfigSystemPrompt := false
mess := []string{}
for messageIndex, i := range input.Messages {
var content string
role := i.Role
// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
if i.FunctionCall != nil && i.Role == "assistant" {
roleFn := "assistant_function_call"
r := config.Roles[roleFn]
if r != "" {
role = roleFn
}
}
r := config.Roles[role]
contentExists := i.Content != nil && i.StringContent != ""
// First attempt to populate content via a chat message specific template
if config.TemplateConfig.ChatMessage != "" {
chatMessageData := model.ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: r,
RoleName: role,
Content: i.StringContent,
MessageIndex: messageIndex,
}
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
if err != nil {
log.Error().Msgf("error processing message %+v using template \"%s\": %v. Skipping!", chatMessageData, config.TemplateConfig.ChatMessage, err)
} else {
if templatedChatMessage == "" {
log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
}
log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
content = templatedChatMessage
}
}
// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
if content == "" {
if r != "" {
if contentExists {
content = fmt.Sprint(r, i.StringContent)
}
if i.FunctionCall != nil {
j, err := json.Marshal(i.FunctionCall)
if err == nil {
if contentExists {
content += "\n" + fmt.Sprint(r, " ", string(j))
} else {
content = fmt.Sprint(r, " ", string(j))
}
}
}
} else {
if contentExists {
content = fmt.Sprint(i.StringContent)
}
if i.FunctionCall != nil {
j, err := json.Marshal(i.FunctionCall)
if err == nil {
if contentExists {
content += "\n" + string(j)
} else {
content = string(j)
}
}
}
}
// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
if contentExists && role == "system" {
suppressConfigSystemPrompt = true
}
}
mess = append(mess, content)
}
predInput = strings.Join(mess, "\n")
log.Debug().Msgf("Prompt (before templating): %s", predInput)
if toStream {
log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
// c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
}
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Chat != "" && !processFunctions {
templateFile = config.TemplateConfig.Chat
}
if config.TemplateConfig.Functions != "" && processFunctions {
templateFile = config.TemplateConfig.Functions
}
if templateFile != "" {
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
SuppressSystemPrompt: suppressConfigSystemPrompt,
Input: predInput,
Functions: funcs,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
} else {
log.Debug().Msgf("Template failed loading: %s", err.Error())
}
}
log.Debug().Msgf("Prompt (after templating): %s", predInput)
if processFunctions {
log.Debug().Msgf("Grammar: %+v", config.Grammar)
}
if toStream {
responses := make(chan schema.OpenAIResponse)
go process(predInput, input, config, o.Loader, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
usage := &schema.OpenAIUsage{}
for ev := range responses {
usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
log.Debug().Msgf("Sending chunk: %s", buf.String())
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
if err != nil {
log.Debug().Msgf("Sending chunk failed: %v", err)
input.Cancel()
break
}
w.Flush()
}
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
FinishReason: "stop",
Index: 0,
Delta: &schema.Message{Content: &emptyMessage},
}},
Object: "chat.completion.chunk",
Usage: *usage,
}
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
w.WriteString("data: [DONE]\n\n")
w.Flush()
}))
return nil
}
result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) {
if processFunctions {
// As we have to change the result before processing, we can't stream the answer (yet?)
ss := map[string]interface{}{}
// This prevent newlines to break JSON parsing for clients
s = utils.EscapeNewLines(s)
json.Unmarshal([]byte(s), &ss)
log.Debug().Msgf("Function return: %s %+v", s, ss)
// The grammar defines the function name as "function", while OpenAI returns "name"
func_name := ss["function"]
// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
d, _ := json.Marshal(args)
ss["arguments"] = string(d)
ss["name"] = func_name
// if do nothing, reply with a message
if func_name == noActionName {
log.Debug().Msgf("nothing to do, computing a reply")
// If there is a message that the LLM already sends as part of the JSON reply, use it
arguments := map[string]interface{}{}
json.Unmarshal([]byte(d), &arguments)
m, exists := arguments["message"]
if exists {
switch message := m.(type) {
case string:
if message != "" {
log.Debug().Msgf("Reply received from LLM: %s", message)
message = backend.Finetune(*config, predInput, message)
log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}})
return
}
}
}
log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
// Note: This costs (in term of CPU) another computation
config.Grammar = ""
images := []string{}
for _, m := range input.Messages {
images = append(images, m.StringImages...)
}
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
if err != nil {
log.Error().Msgf("inference error: %s", err.Error())
return
}
prediction, err := predFunc()
if err != nil {
log.Error().Msgf("inference error: %s", err.Error())
return
}
fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response)
*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}})
} else {
// otherwise reply with the function call
*c = append(*c, schema.Choice{
FinishReason: "function_call",
Message: &schema.Message{Role: "assistant", FunctionCall: ss},
})
}
return
}
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
}, nil)
if err != nil {
return err
}
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "chat.completion",
Usage: schema.OpenAIUsage{
PromptTokens: tokenUsage.Prompt,
CompletionTokens: tokenUsage.Completion,
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
},
}
respData, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", respData)
// Return the prediction in the response body
return c.JSON(resp)
}
}

199
api/openai/completion.go Normal file
View File

@@ -0,0 +1,199 @@
package openai
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
"time"
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/go-skynet/LocalAI/pkg/grammar"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
// https://platform.openai.com/docs/api-reference/completions
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
id := uuid.New().String()
created := int(time.Now().Unix())
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
resp := schema.OpenAIResponse{
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
Index: 0,
Text: s,
},
},
Object: "text_completion",
Usage: schema.OpenAIUsage{
PromptTokens: usage.Prompt,
CompletionTokens: usage.Completion,
TotalTokens: usage.Prompt + usage.Completion,
},
}
log.Debug().Msgf("Sending goroutine: %s", s)
responses <- resp
return true
})
close(responses)
}
return func(c *fiber.Ctx) error {
modelFile, input, err := readInput(c, o, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("`input`: %+v", input)
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
if input.ResponseFormat.Type == "json_object" {
input.Grammar = grammar.JSONBNF
}
log.Debug().Msgf("Parameter Config: %+v", config)
if input.Stream {
log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
//c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
}
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Completion != "" {
templateFile = config.TemplateConfig.Completion
}
if input.Stream {
if len(config.PromptStrings) > 1 {
return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
}
predInput := config.PromptStrings[0]
if templateFile != "" {
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
Input: predInput,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
}
responses := make(chan schema.OpenAIResponse)
go process(predInput, input, config, o.Loader, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
for ev := range responses {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
log.Debug().Msgf("Sending chunk: %s", buf.String())
fmt.Fprintf(w, "data: %v\n", buf.String())
w.Flush()
}
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
Index: 0,
FinishReason: "stop",
},
},
Object: "text_completion",
}
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
w.WriteString("data: [DONE]\n\n")
w.Flush()
}))
return nil
}
var result []schema.Choice
totalTokenUsage := backend.TokenUsage{}
for k, i := range config.PromptStrings {
if templateFile != "" {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
Input: i,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
}
r, tokenUsage, err := ComputeChoices(
input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
}, nil)
if err != nil {
return err
}
totalTokenUsage.Prompt += tokenUsage.Prompt
totalTokenUsage.Completion += tokenUsage.Completion
result = append(result, r...)
}
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "text_completion",
Usage: schema.OpenAIUsage{
PromptTokens: totalTokenUsage.Prompt,
CompletionTokens: totalTokenUsage.Completion,
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
},
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}

94
api/openai/edit.go Normal file
View File

@@ -0,0 +1,94 @@
package openai
import (
"encoding/json"
"fmt"
"time"
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"github.com/rs/zerolog/log"
)
func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
modelFile, input, err := readInput(c, o, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Edit != "" {
templateFile = config.TemplateConfig.Edit
}
var result []schema.Choice
totalTokenUsage := backend.TokenUsage{}
for _, i := range config.InputStrings {
if templateFile != "" {
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
Input: i,
Instruction: input.Instruction,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
}
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
*c = append(*c, schema.Choice{Text: s})
}, nil)
if err != nil {
return err
}
totalTokenUsage.Prompt += tokenUsage.Prompt
totalTokenUsage.Completion += tokenUsage.Completion
result = append(result, r...)
}
id := uuid.New().String()
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "edit",
Usage: schema.OpenAIUsage{
PromptTokens: totalTokenUsage.Prompt,
CompletionTokens: totalTokenUsage.Completion,
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
},
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}

78
api/openai/embeddings.go Normal file
View File

@@ -0,0 +1,78 @@
package openai
import (
"encoding/json"
"fmt"
"time"
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/google/uuid"
"github.com/go-skynet/LocalAI/api/options"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
// https://platform.openai.com/docs/api-reference/embeddings
func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, o, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
items := []schema.Item{}
for i, s := range config.InputToken {
// get the model function to call for the result
embedFn, err := backend.ModelEmbedding("", s, o.Loader, *config, o)
if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
}
for i, s := range config.InputStrings {
// get the model function to call for the result
embedFn, err := backend.ModelEmbedding(s, []int{}, o.Loader, *config, o)
if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
}
id := uuid.New().String()
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Data: items,
Object: "list",
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}

239
api/openai/image.go Normal file
View File

@@ -0,0 +1,239 @@
package openai
import (
"bufio"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/google/uuid"
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
func downloadFile(url string) (string, error) {
// Get the data
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
// Create the file
out, err := os.CreateTemp("", "image")
if err != nil {
return "", err
}
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
return out.Name(), err
}
// https://platform.openai.com/docs/api-reference/images/create
/*
*
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "A cute baby sea otter",
"n": 1,
"size": "512x512"
}'
*
*/
func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, o, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
if m == "" {
m = model.StableDiffusionBackend
}
log.Debug().Msgf("Loading model: %+v", m)
config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
src := ""
if input.File != "" {
fileData := []byte{}
// check if input.File is an URL, if so download it and save it
// to a temporary file
if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
out, err := downloadFile(input.File)
if err != nil {
return fmt.Errorf("failed downloading file:%w", err)
}
defer os.RemoveAll(out)
fileData, err = os.ReadFile(out)
if err != nil {
return fmt.Errorf("failed reading file:%w", err)
}
} else {
// base 64 decode the file and write it somewhere
// that we will cleanup
fileData, err = base64.StdEncoding.DecodeString(input.File)
if err != nil {
return err
}
}
// Create a temporary file
outputFile, err := os.CreateTemp(o.ImageDir, "b64")
if err != nil {
return err
}
// write the base64 result
writer := bufio.NewWriter(outputFile)
_, err = writer.Write(fileData)
if err != nil {
outputFile.Close()
return err
}
outputFile.Close()
src = outputFile.Name()
defer os.RemoveAll(src)
}
log.Debug().Msgf("Parameter Config: %+v", config)
switch config.Backend {
case "stablediffusion":
config.Backend = model.StableDiffusionBackend
case "tinydream":
config.Backend = model.TinyDreamBackend
case "":
config.Backend = model.StableDiffusionBackend
}
sizeParts := strings.Split(input.Size, "x")
if len(sizeParts) != 2 {
return fmt.Errorf("Invalid value for 'size'")
}
width, err := strconv.Atoi(sizeParts[0])
if err != nil {
return fmt.Errorf("Invalid value for 'size'")
}
height, err := strconv.Atoi(sizeParts[1])
if err != nil {
return fmt.Errorf("Invalid value for 'size'")
}
b64JSON := false
if input.ResponseFormat.Type == "b64_json" {
b64JSON = true
}
// src and clip_skip
var result []schema.Item
for _, i := range config.PromptStrings {
n := input.N
if input.N == 0 {
n = 1
}
for j := 0; j < n; j++ {
prompts := strings.Split(i, "|")
positive_prompt := prompts[0]
negative_prompt := ""
if len(prompts) > 1 {
negative_prompt = prompts[1]
}
mode := 0
step := config.Step
if step == 0 {
step = 15
}
if input.Mode != 0 {
mode = input.Mode
}
if input.Step != 0 {
step = input.Step
}
tempDir := ""
if !b64JSON {
tempDir = o.ImageDir
}
// Create a temporary file
outputFile, err := os.CreateTemp(tempDir, "b64")
if err != nil {
return err
}
outputFile.Close()
output := outputFile.Name() + ".png"
// Rename the temporary file
err = os.Rename(outputFile.Name(), output)
if err != nil {
return err
}
baseURL := c.BaseURL()
fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, src, output, o.Loader, *config, o)
if err != nil {
return err
}
if err := fn(); err != nil {
return err
}
item := &schema.Item{}
if b64JSON {
defer os.RemoveAll(output)
data, err := os.ReadFile(output)
if err != nil {
return err
}
item.B64JSON = base64.StdEncoding.EncodeToString(data)
} else {
base := filepath.Base(output)
item.URL = baseURL + "/generated-images/" + base
}
result = append(result, *item)
}
}
id := uuid.New().String()
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Data: result,
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}

55
api/openai/inference.go Normal file
View File

@@ -0,0 +1,55 @@
package openai
import (
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
model "github.com/go-skynet/LocalAI/pkg/model"
)
func ComputeChoices(
req *schema.OpenAIRequest,
predInput string,
config *config.Config,
o *options.Option,
loader *model.ModelLoader,
cb func(string, *[]schema.Choice),
tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) {
n := req.N // number of completions to return
result := []schema.Choice{}
if n == 0 {
n = 1
}
images := []string{}
for _, m := range req.Messages {
images = append(images, m.StringImages...)
}
// get the model function to call for the result
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
if err != nil {
return result, backend.TokenUsage{}, err
}
tokenUsage := backend.TokenUsage{}
for i := 0; i < n; i++ {
prediction, err := predFunc()
if err != nil {
return result, backend.TokenUsage{}, err
}
tokenUsage.Prompt += prediction.Usage.Prompt
tokenUsage.Completion += prediction.Usage.Completion
finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
cb(finetunedResponse, &result)
//result = append(result, Choice{Text: prediction})
}
return result, tokenUsage, err
}

69
api/openai/list.go Normal file
View File

@@ -0,0 +1,69 @@
package openai
import (
"regexp"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/schema"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
)
func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
models, err := loader.ListModels()
if err != nil {
return err
}
var mm map[string]interface{} = map[string]interface{}{}
dataModels := []schema.OpenAIModel{}
var filterFn func(name string) bool
filter := c.Query("filter")
// If filter is not specified, do not filter the list by model name
if filter == "" {
filterFn = func(_ string) bool { return true }
} else {
// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
rxp, err := regexp.Compile(filter)
if err != nil {
return err
}
filterFn = func(name string) bool {
return rxp.MatchString(name)
}
}
// By default, exclude any loose files that are already referenced by a configuration file.
excludeConfigured := c.QueryBool("excludeConfigured", true)
// Start with the known configurations
for _, c := range cm.GetAllConfigs() {
if excludeConfigured {
mm[c.Model] = nil
}
if filterFn(c.Name) {
dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
}
}
// Then iterate through the loose files:
for _, m := range models {
// And only adds them if they shouldn't be skipped.
if _, exists := mm[m]; !exists && filterFn(m) {
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
}
}
return c.JSON(struct {
Object string `json:"object"`
Data []schema.OpenAIModel `json:"data"`
}{
Object: "list",
Data: dataModels,
})
}
}

336
api/openai/request.go Normal file
View File

@@ -0,0 +1,336 @@
package openai
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"
config "github.com/go-skynet/LocalAI/api/config"
options "github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) {
loader := o.Loader
input := new(schema.OpenAIRequest)
ctx, cancel := context.WithCancel(o.Context)
input.Context = ctx
input.Cancel = cancel
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
}
modelFile := input.Model
if c.Params("model") != "" {
modelFile = c.Params("model")
}
received, _ := json.Marshal(input)
log.Debug().Msgf("Request received: %s", string(received))
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelFile == "" && !bearerExists && randomModel {
models, _ := loader.ListModels()
if len(models) > 0 {
modelFile = models[0]
log.Debug().Msgf("No model specified, using: %s", modelFile)
} else {
log.Debug().Msgf("No model specified, returning error")
return "", nil, fmt.Errorf("no model specified")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
return modelFile, input, nil
}
// this function check if the string is an URL, if it's an URL downloads the image in memory
// encodes it in base64 and returns the base64 string
func getBase64Image(s string) (string, error) {
if strings.HasPrefix(s, "http") {
// download the image
resp, err := http.Get(s)
if err != nil {
return "", err
}
defer resp.Body.Close()
// read the image data into memory
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
// encode the image data in base64
encoded := base64.StdEncoding.EncodeToString(data)
// return the base64 string
return encoded, nil
}
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
}
return "", fmt.Errorf("not valid string")
}
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
if input.Echo {
config.Echo = input.Echo
}
if input.TopK != 0 {
config.TopK = input.TopK
}
if input.TopP != 0 {
config.TopP = input.TopP
}
if input.Backend != "" {
config.Backend = input.Backend
}
if input.ClipSkip != 0 {
config.Diffusers.ClipSkip = input.ClipSkip
}
if input.ModelBaseName != "" {
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
}
if input.NegativePromptScale != 0 {
config.NegativePromptScale = input.NegativePromptScale
}
if input.UseFastTokenizer {
config.UseFastTokenizer = input.UseFastTokenizer
}
if input.NegativePrompt != "" {
config.NegativePrompt = input.NegativePrompt
}
if input.RopeFreqBase != 0 {
config.RopeFreqBase = input.RopeFreqBase
}
if input.RopeFreqScale != 0 {
config.RopeFreqScale = input.RopeFreqScale
}
if input.Grammar != "" {
config.Grammar = input.Grammar
}
if input.Temperature != 0 {
config.Temperature = input.Temperature
}
if input.Maxtokens != 0 {
config.Maxtokens = input.Maxtokens
}
switch stop := input.Stop.(type) {
case string:
if stop != "" {
config.StopWords = append(config.StopWords, stop)
}
case []interface{}:
for _, pp := range stop {
if s, ok := pp.(string); ok {
config.StopWords = append(config.StopWords, s)
}
}
}
// Decode each request's message content
index := 0
for i, m := range input.Messages {
switch content := m.Content.(type) {
case string:
input.Messages[i].StringContent = content
case []interface{}:
dat, _ := json.Marshal(content)
c := []schema.Content{}
json.Unmarshal(dat, &c)
for _, pp := range c {
if pp.Type == "text" {
input.Messages[i].StringContent = pp.Text
} else if pp.Type == "image_url" {
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
base64, err := getBase64Image(pp.ImageURL.URL)
if err == nil {
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
index++
} else {
fmt.Print("Failed encoding image", err)
}
}
}
}
}
if input.RepeatPenalty != 0 {
config.RepeatPenalty = input.RepeatPenalty
}
if input.Keep != 0 {
config.Keep = input.Keep
}
if input.Batch != 0 {
config.Batch = input.Batch
}
if input.F16 {
config.F16 = input.F16
}
if input.IgnoreEOS {
config.IgnoreEOS = input.IgnoreEOS
}
if input.Seed != 0 {
config.Seed = input.Seed
}
if input.Mirostat != 0 {
config.LLMConfig.Mirostat = input.Mirostat
}
if input.MirostatETA != 0 {
config.LLMConfig.MirostatETA = input.MirostatETA
}
if input.MirostatTAU != 0 {
config.LLMConfig.MirostatTAU = input.MirostatTAU
}
if input.TypicalP != 0 {
config.TypicalP = input.TypicalP
}
switch inputs := input.Input.(type) {
case string:
if inputs != "" {
config.InputStrings = append(config.InputStrings, inputs)
}
case []interface{}:
for _, pp := range inputs {
switch i := pp.(type) {
case string:
config.InputStrings = append(config.InputStrings, i)
case []interface{}:
tokens := []int{}
for _, ii := range i {
tokens = append(tokens, int(ii.(float64)))
}
config.InputToken = append(config.InputToken, tokens)
}
}
}
// Can be either a string or an object
switch fnc := input.FunctionCall.(type) {
case string:
if fnc != "" {
config.SetFunctionCallString(fnc)
}
case map[string]interface{}:
var name string
n, exists := fnc["name"]
if exists {
nn, e := n.(string)
if e {
name = nn
}
}
config.SetFunctionCallNameString(name)
}
switch p := input.Prompt.(type) {
case string:
config.PromptStrings = append(config.PromptStrings, p)
case []interface{}:
for _, pp := range p {
if s, ok := pp.(string); ok {
config.PromptStrings = append(config.PromptStrings, s)
}
}
}
}
func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
// Load a config file if present after the model name
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
var cfg *config.Config
defaults := func() {
cfg = config.DefaultConfig(modelFile)
cfg.ContextSize = ctx
cfg.Threads = threads
cfg.F16 = f16
cfg.Debug = debug
}
cfgExisting, exists := cm.GetConfig(modelFile)
if !exists {
if _, err := os.Stat(modelConfig); err == nil {
if err := cm.LoadConfig(modelConfig); err != nil {
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
}
cfgExisting, exists = cm.GetConfig(modelFile)
if exists {
cfg = &cfgExisting
} else {
defaults()
}
} else {
defaults()
}
} else {
cfg = &cfgExisting
}
// Set the parameters for the language model prediction
updateConfig(cfg, input)
// Don't allow 0 as setting
if cfg.Threads == 0 {
if threads != 0 {
cfg.Threads = threads
} else {
cfg.Threads = 4
}
}
// Enforce debug flag if passed from CLI
if debug {
cfg.Debug = true
}
return cfg, input, nil
}

View File

@@ -0,0 +1,71 @@
package openai
import (
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
"github.com/go-skynet/LocalAI/api/backend"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)
// https://platform.openai.com/docs/api-reference/audio/create
func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, o, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
// retrieve the file data from the request
file, err := c.FormFile("file")
if err != nil {
return err
}
f, err := file.Open()
if err != nil {
return err
}
defer f.Close()
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return err
}
defer os.RemoveAll(dir)
dst := filepath.Join(dir, path.Base(file.Filename))
dstFile, err := os.Create(dst)
if err != nil {
return err
}
if _, err := io.Copy(dstFile, f); err != nil {
log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
return err
}
log.Debug().Msgf("Audio file copied to: %+v", dst)
tr, err := backend.ModelTranscription(dst, input.Language, o.Loader, *config, o)
if err != nil {
return err
}
log.Debug().Msgf("Trascribed: %+v", tr)
// TODO: handle different outputs here
return c.Status(http.StatusOK).JSON(tr)
}
}

262
api/options/options.go Normal file
View File

@@ -0,0 +1,262 @@
package options
import (
"context"
"embed"
"encoding/json"
"time"
"github.com/go-skynet/LocalAI/metrics"
"github.com/go-skynet/LocalAI/pkg/gallery"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
type Option struct {
Context context.Context
ConfigFile string
Loader *model.ModelLoader
UploadLimitMB, Threads, ContextSize int
F16 bool
Debug, DisableMessage bool
ImageDir string
AudioDir string
CORS bool
PreloadJSONModels string
PreloadModelsFromPath string
CORSAllowOrigins string
ApiKeys []string
Metrics *metrics.Metrics
ModelLibraryURL string
Galleries []gallery.Gallery
BackendAssets embed.FS
AssetsDestination string
ExternalGRPCBackends map[string]string
AutoloadGalleries bool
SingleBackend bool
ParallelBackendRequests bool
WatchDogIdle bool
WatchDogBusy bool
WatchDog bool
ModelsURL []string
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
}
type AppOption func(*Option)
func NewOptions(o ...AppOption) *Option {
opt := &Option{
Context: context.Background(),
UploadLimitMB: 15,
Threads: 1,
ContextSize: 512,
Debug: true,
DisableMessage: true,
}
for _, oo := range o {
oo(opt)
}
return opt
}
func WithModelsURL(urls ...string) AppOption {
return func(o *Option) {
o.ModelsURL = urls
}
}
func WithCors(b bool) AppOption {
return func(o *Option) {
o.CORS = b
}
}
func WithModelLibraryURL(url string) AppOption {
return func(o *Option) {
o.ModelLibraryURL = url
}
}
var EnableWatchDog = func(o *Option) {
o.WatchDog = true
}
var EnableWatchDogIdleCheck = func(o *Option) {
o.WatchDog = true
o.WatchDogIdle = true
}
var EnableWatchDogBusyCheck = func(o *Option) {
o.WatchDog = true
o.WatchDogBusy = true
}
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
return func(o *Option) {
o.WatchDogBusyTimeout = t
}
}
func SetWatchDogIdleTimeout(t time.Duration) AppOption {
return func(o *Option) {
o.WatchDogIdleTimeout = t
}
}
var EnableSingleBackend = func(o *Option) {
o.SingleBackend = true
}
var EnableParallelBackendRequests = func(o *Option) {
o.ParallelBackendRequests = true
}
var EnableGalleriesAutoload = func(o *Option) {
o.AutoloadGalleries = true
}
func WithExternalBackend(name string, uri string) AppOption {
return func(o *Option) {
if o.ExternalGRPCBackends == nil {
o.ExternalGRPCBackends = make(map[string]string)
}
o.ExternalGRPCBackends[name] = uri
}
}
func WithCorsAllowOrigins(b string) AppOption {
return func(o *Option) {
o.CORSAllowOrigins = b
}
}
func WithBackendAssetsOutput(out string) AppOption {
return func(o *Option) {
o.AssetsDestination = out
}
}
func WithBackendAssets(f embed.FS) AppOption {
return func(o *Option) {
o.BackendAssets = f
}
}
func WithStringGalleries(galls string) AppOption {
return func(o *Option) {
if galls == "" {
log.Debug().Msgf("no galleries to load")
o.Galleries = []gallery.Gallery{}
return
}
var galleries []gallery.Gallery
if err := json.Unmarshal([]byte(galls), &galleries); err != nil {
log.Error().Msgf("failed loading galleries: %s", err.Error())
}
o.Galleries = append(o.Galleries, galleries...)
}
}
func WithGalleries(galleries []gallery.Gallery) AppOption {
return func(o *Option) {
o.Galleries = append(o.Galleries, galleries...)
}
}
func WithContext(ctx context.Context) AppOption {
return func(o *Option) {
o.Context = ctx
}
}
func WithYAMLConfigPreload(configFile string) AppOption {
return func(o *Option) {
o.PreloadModelsFromPath = configFile
}
}
func WithJSONStringPreload(configFile string) AppOption {
return func(o *Option) {
o.PreloadJSONModels = configFile
}
}
func WithConfigFile(configFile string) AppOption {
return func(o *Option) {
o.ConfigFile = configFile
}
}
func WithModelLoader(loader *model.ModelLoader) AppOption {
return func(o *Option) {
o.Loader = loader
}
}
func WithUploadLimitMB(limit int) AppOption {
return func(o *Option) {
o.UploadLimitMB = limit
}
}
func WithThreads(threads int) AppOption {
return func(o *Option) {
o.Threads = threads
}
}
func WithContextSize(ctxSize int) AppOption {
return func(o *Option) {
o.ContextSize = ctxSize
}
}
func WithF16(f16 bool) AppOption {
return func(o *Option) {
o.F16 = f16
}
}
func WithDebug(debug bool) AppOption {
return func(o *Option) {
o.Debug = debug
}
}
func WithDisableMessage(disableMessage bool) AppOption {
return func(o *Option) {
o.DisableMessage = disableMessage
}
}
func WithAudioDir(audioDir string) AppOption {
return func(o *Option) {
o.AudioDir = audioDir
}
}
func WithImageDir(imageDir string) AppOption {
return func(o *Option) {
o.ImageDir = imageDir
}
}
func WithApiKeys(apiKeys []string) AppOption {
return func(o *Option) {
o.ApiKeys = apiKeys
}
}
func WithMetrics(meter *metrics.Metrics) AppOption {
return func(o *Option) {
o.Metrics = meter
}
}

135
api/schema/openai.go Normal file
View File

@@ -0,0 +1,135 @@
package schema
import (
"context"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/pkg/grammar"
)
// APIError provides error information returned by the OpenAI API.
type APIError struct {
Code any `json:"code,omitempty"`
Message string `json:"message"`
Param *string `json:"param,omitempty"`
Type string `json:"type"`
}
type ErrorResponse struct {
Error *APIError `json:"error,omitempty"`
}
type OpenAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type Item struct {
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
Object string `json:"object,omitempty"`
// Images
URL string `json:"url,omitempty"`
B64JSON string `json:"b64_json,omitempty"`
}
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"object,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
Data []Item `json:"data,omitempty"`
Usage OpenAIUsage `json:"usage"`
}
type Choice struct {
Index int `json:"index"`
FinishReason string `json:"finish_reason,omitempty"`
Message *Message `json:"message,omitempty"`
Delta *Message `json:"delta,omitempty"`
Text string `json:"text,omitempty"`
}
type Content struct {
Type string `json:"type" yaml:"type"`
Text string `json:"text" yaml:"text"`
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
}
type ContentURL struct {
URL string `json:"url" yaml:"url"`
}
type Message struct {
// The message role
Role string `json:"role,omitempty" yaml:"role"`
// The message content
Content interface{} `json:"content" yaml:"content"`
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
// A result of a function call
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
}
type OpenAIModel struct {
ID string `json:"id"`
Object string `json:"object"`
}
type ChatCompletionResponseFormatType string
type ChatCompletionResponseFormat struct {
Type ChatCompletionResponseFormatType `json:"type,omitempty"`
}
type OpenAIRequest struct {
config.PredictionOptions
Context context.Context
Cancel context.CancelFunc
// whisper
File string `json:"file" validate:"required"`
//whisper/image
ResponseFormat ChatCompletionResponseFormat `json:"response_format"`
// image
Size string `json:"size"`
// Prompt is read only by completion/image API calls
Prompt interface{} `json:"prompt" yaml:"prompt"`
// Edit endpoint
Instruction string `json:"instruction" yaml:"instruction"`
Input interface{} `json:"input" yaml:"input"`
Stop interface{} `json:"stop" yaml:"stop"`
// Messages is read only by chat/completion API calls
Messages []Message `json:"messages" yaml:"messages"`
// A list of available functions to call
Functions []grammar.Function `json:"functions" yaml:"functions"`
FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object
Stream bool `json:"stream"`
// Image (not supported by OpenAI)
Mode int `json:"mode"`
Step int `json:"step"`
// A grammar to constrain the LLM output
Grammar string `json:"grammar" yaml:"grammar"`
JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
Backend string `json:"backend" yaml:"backend"`
// AutoGPTQ
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
}

16
api/schema/whisper.go Normal file
View File

@@ -0,0 +1,16 @@
package schema
import "time"
type Segment struct {
Id int `json:"id"`
Start time.Duration `json:"start"`
End time.Duration `json:"end"`
Text string `json:"text"`
Tokens []int `json:"tokens"`
}
type Result struct {
Segments []Segment `json:"segments"`
Text string `json:"text"`
}

6
assets.go Normal file
View File

@@ -0,0 +1,6 @@
package main
import "embed"
//go:embed backend-assets/*
var backendAssets embed.FS

210
backend/backend.proto Normal file
View File

@@ -0,0 +1,210 @@
syntax = "proto3";
option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
option java_multiple_files = true;
option java_package = "io.skynet.localai.backend";
option java_outer_classname = "LocalAIBackend";
package backend;
service Backend {
rpc Health(HealthMessage) returns (Reply) {}
rpc Predict(PredictOptions) returns (Reply) {}
rpc LoadModel(ModelOptions) returns (Result) {}
rpc PredictStream(PredictOptions) returns (stream Reply) {}
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
rpc TTS(TTSRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
}
message HealthMessage {}
// The request message containing the user's name.
message PredictOptions {
string Prompt = 1;
int32 Seed = 2;
int32 Threads = 3;
int32 Tokens = 4;
int32 TopK = 5;
int32 Repeat = 6;
int32 Batch = 7;
int32 NKeep = 8;
float Temperature = 9;
float Penalty = 10;
bool F16KV = 11;
bool DebugMode = 12;
repeated string StopPrompts = 13;
bool IgnoreEOS = 14;
float TailFreeSamplingZ = 15;
float TypicalP = 16;
float FrequencyPenalty = 17;
float PresencePenalty = 18;
int32 Mirostat = 19;
float MirostatETA = 20;
float MirostatTAU = 21;
bool PenalizeNL = 22;
string LogitBias = 23;
bool MLock = 25;
bool MMap = 26;
bool PromptCacheAll = 27;
bool PromptCacheRO = 28;
string Grammar = 29;
string MainGPU = 30;
string TensorSplit = 31;
float TopP = 32;
string PromptCachePath = 33;
bool Debug = 34;
repeated int32 EmbeddingTokens = 35;
string Embeddings = 36;
float RopeFreqBase = 37;
float RopeFreqScale = 38;
float NegativePromptScale = 39;
string NegativePrompt = 40;
int32 NDraft = 41;
repeated string Images = 42;
}
// The response message containing the result
message Reply {
bytes message = 1;
}
message ModelOptions {
string Model = 1;
int32 ContextSize = 2;
int32 Seed = 3;
int32 NBatch = 4;
bool F16Memory = 5;
bool MLock = 6;
bool MMap = 7;
bool VocabOnly = 8;
bool LowVRAM = 9;
bool Embeddings = 10;
bool NUMA = 11;
int32 NGPULayers = 12;
string MainGPU = 13;
string TensorSplit = 14;
int32 Threads = 15;
string LibrarySearchPath = 16;
float RopeFreqBase = 17;
float RopeFreqScale = 18;
float RMSNormEps = 19;
int32 NGQA = 20;
string ModelFile = 21;
// AutoGPTQ
string Device = 22;
bool UseTriton = 23;
string ModelBaseName = 24;
bool UseFastTokenizer = 25;
// Diffusers
string PipelineType = 26;
string SchedulerType = 27;
bool CUDA = 28;
float CFGScale = 29;
bool IMG2IMG = 30;
string CLIPModel = 31;
string CLIPSubfolder = 32;
int32 CLIPSkip = 33;
string ControlNet = 48;
string Tokenizer = 34;
// LLM (llama.cpp)
string LoraBase = 35;
string LoraAdapter = 36;
float LoraScale = 42;
bool NoMulMatQ = 37;
string DraftModel = 39;
string AudioPath = 38;
// vllm
string Quantization = 40;
string MMProj = 41;
string RopeScaling = 43;
float YarnExtFactor = 44;
float YarnAttnFactor = 45;
float YarnBetaFast = 46;
float YarnBetaSlow = 47;
string Type = 49;
}
message Result {
string message = 1;
bool success = 2;
}
message EmbeddingResult {
repeated float embeddings = 1;
}
message TranscriptRequest {
string dst = 2;
string language = 3;
uint32 threads = 4;
}
message TranscriptResult {
repeated TranscriptSegment segments = 1;
string text = 2;
}
message TranscriptSegment {
int32 id = 1;
int64 start = 2;
int64 end = 3;
string text = 4;
repeated int32 tokens = 5;
}
message GenerateImageRequest {
int32 height = 1;
int32 width = 2;
int32 mode = 3;
int32 step = 4;
int32 seed = 5;
string positive_prompt = 6;
string negative_prompt = 7;
string dst = 8;
string src = 9;
// Diffusers
string EnableParameters = 10;
int32 CLIPSkip = 11;
}
message TTSRequest {
string text = 1;
string model = 2;
string dst = 3;
}
message TokenizationResponse {
int32 length = 1;
repeated int32 tokens = 2;
}
message MemoryUsageData {
uint64 total = 1;
map<string, uint64> breakdown = 2;
}
message StatusResponse {
enum State {
UNINITIALIZED = 0;
BUSY = 1;
READY = 2;
ERROR = -1;
}
State state = 1;
MemoryUsageData memory = 2;
}

457
backend/backend_grpc.pb.go Normal file
View File

@@ -0,0 +1,457 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.2.0
// - protoc v4.23.4
// source: backend/backend.proto
package proto
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.32.0 or later.
const _ = grpc.SupportPackageIsVersion7
// BackendClient is the client API for Backend service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type BackendClient interface {
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
}
type backendClient struct {
cc grpc.ClientConnInterface
}
func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
return &backendClient{cc}
}
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
if err != nil {
return nil, err
}
x := &backendPredictStreamClient{stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
type Backend_PredictStreamClient interface {
Recv() (*Reply, error)
grpc.ClientStream
}
type backendPredictStreamClient struct {
grpc.ClientStream
}
func (x *backendPredictStreamClient) Recv() (*Reply, error) {
m := new(Reply)
if err := x.ClientStream.RecvMsg(m); err != nil {
return nil, err
}
return m, nil
}
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
out := new(EmbeddingResult)
err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
out := new(TranscriptResult)
err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
out := new(TokenizationResponse)
err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
out := new(StatusResponse)
err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
// BackendServer is the server API for Backend service.
// All implementations must embed UnimplementedBackendServer
// for forward compatibility
type BackendServer interface {
Health(context.Context, *HealthMessage) (*Reply, error)
Predict(context.Context, *PredictOptions) (*Reply, error)
LoadModel(context.Context, *ModelOptions) (*Result, error)
PredictStream(*PredictOptions, Backend_PredictStreamServer) error
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
TTS(context.Context, *TTSRequest) (*Result, error)
TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
Status(context.Context, *HealthMessage) (*StatusResponse, error)
mustEmbedUnimplementedBackendServer()
}
// UnimplementedBackendServer must be embedded to have forward compatible implementations.
type UnimplementedBackendServer struct {
}
func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
}
func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
}
func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
}
func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
}
func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
}
func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
}
func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
}
func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
}
func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
}
func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
}
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to BackendServer will
// result in compilation errors.
type UnsafeBackendServer interface {
mustEmbedUnimplementedBackendServer()
}
func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
s.RegisterService(&Backend_ServiceDesc, srv)
}
func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(HealthMessage)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Health(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Health",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PredictOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Predict(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Predict",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ModelOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).LoadModel(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/LoadModel",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(PredictOptions)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
}
type Backend_PredictStreamServer interface {
Send(*Reply) error
grpc.ServerStream
}
type backendPredictStreamServer struct {
grpc.ServerStream
}
func (x *backendPredictStreamServer) Send(m *Reply) error {
return x.ServerStream.SendMsg(m)
}
func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PredictOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Embedding(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Embedding",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GenerateImageRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).GenerateImage(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/GenerateImage",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(TranscriptRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).AudioTranscription(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/AudioTranscription",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(TTSRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).TTS(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/TTS",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PredictOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).TokenizeString(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/TokenizeString",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(HealthMessage)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Status(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Status",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
}
return interceptor(ctx, in, info, handler)
}
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var Backend_ServiceDesc = grpc.ServiceDesc{
ServiceName: "backend.Backend",
HandlerType: (*BackendServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "Health",
Handler: _Backend_Health_Handler,
},
{
MethodName: "Predict",
Handler: _Backend_Predict_Handler,
},
{
MethodName: "LoadModel",
Handler: _Backend_LoadModel_Handler,
},
{
MethodName: "Embedding",
Handler: _Backend_Embedding_Handler,
},
{
MethodName: "GenerateImage",
Handler: _Backend_GenerateImage_Handler,
},
{
MethodName: "AudioTranscription",
Handler: _Backend_AudioTranscription_Handler,
},
{
MethodName: "TTS",
Handler: _Backend_TTS_Handler,
},
{
MethodName: "TokenizeString",
Handler: _Backend_TokenizeString_Handler,
},
{
MethodName: "Status",
Handler: _Backend_Status_Handler,
},
},
Streams: []grpc.StreamDesc{
{
StreamName: "PredictStream",
Handler: _Backend_PredictStream_Handler,
ServerStreams: true,
},
},
Metadata: "backend/backend.proto",
}

3
backend/cpp/grpc/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
installed_packages/
grpc_build/
grpc_repo/

66
backend/cpp/grpc/Makefile Normal file
View File

@@ -0,0 +1,66 @@
# Basic platform detection
HOST_SYSTEM = $(shell uname | cut -f 1 -d_)
SYSTEM ?= $(HOST_SYSTEM)
TAG_LIB_GRPC?=v1.59.0
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
GIT_CLONE_DEPTH?=1
NUM_BUILD_THREADS?=$(shell nproc --ignore=1)
INSTALLED_PACKAGES=installed_packages
GRPC_REPO=grpc_repo
GRPC_BUILD=grpc_build
export CMAKE_ARGS?=
CMAKE_ARGS+=-DCMAKE_BUILD_TYPE=Release
CMAKE_ARGS+=-DgRPC_INSTALL=ON
CMAKE_ARGS+=-DEXECUTABLE_OUTPUT_PATH=../$(INSTALLED_PACKAGES)/grpc/bin
CMAKE_ARGS+=-DLIBRARY_OUTPUT_PATH=../$(INSTALLED_PACKAGES)/grpc/lib
CMAKE_ARGS+=-DgRPC_BUILD_TESTS=OFF
CMAKE_ARGS+=-DgRPC_BUILD_CSHARP_EXT=OFF
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_CPP_PLUGIN=ON
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_PYTHON_PLUGIN=ON
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF
CMAKE_ARGS+=-Dprotobuf_WITH_ZLIB=ON
CMAKE_ARGS+=-DRE2_BUILD_TESTING=OFF
CMAKE_ARGS+=-DCMAKE_INSTALL_PREFIX=../$(INSTALLED_PACKAGES)
# windows need to set OPENSSL_NO_ASM. Results in slower crypto performance but doesn't build otherwise.
# May be resolvable, but for now its set. More info: https://stackoverflow.com/a/75240504/480673
ifeq ($(SYSTEM),MSYS)
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
endif
ifeq ($(SYSTEM),MINGW64)
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
endif
ifeq ($(SYSTEM),MINGW32)
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
endif
ifeq ($(SYSTEM),CYGWIN)
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
endif
$(INSTALLED_PACKAGES): grpc_build
$(GRPC_REPO):
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH)
$(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD)
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
build: $(INSTALLED_PACKAGES)
rebuild:
rm -rf grpc_build
$(MAKE) grpc_build
clean:
rm -rf grpc_build
rm -rf grpc_repo
rm -rf installed_packages

View File

@@ -0,0 +1,82 @@
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
set(TARGET myclip)
add_library(${TARGET} clip.cpp clip.h)
install(TARGETS ${TARGET} LIBRARY)
target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if (NOT MSVC)
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
endif()
set(TARGET grpc-server)
# END CLIP hack
set(CMAKE_CXX_STANDARD 17)
cmake_minimum_required(VERSION 3.15)
set(TARGET grpc-server)
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
set(_REFLECTION grpc++_reflection)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
# Set correct Homebrew install folder for Apple Silicon and Intel Macs
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
set(HOMEBREW_DEFAULT_PREFIX "/opt/homebrew")
else()
set(HOMEBREW_DEFAULT_PREFIX "/usr/local")
endif()
link_directories("${HOMEBREW_DEFAULT_PREFIX}/lib")
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
endif()
find_package(absl CONFIG REQUIRED)
find_package(Protobuf CONFIG REQUIRED)
find_package(gRPC CONFIG REQUIRED)
find_program(_PROTOBUF_PROTOC protoc)
set(_GRPC_GRPCPP grpc++)
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${Protobuf_INCLUDE_DIRS})
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
# Proto file
get_filename_component(hw_proto "../../../../../../backend/backend.proto" ABSOLUTE)
get_filename_component(hw_proto_path "${hw_proto}" PATH)
# Generated sources
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc")
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h")
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc")
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h")
add_custom_command(
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
COMMAND ${_PROTOBUF_PROTOC}
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
-I "${hw_proto_path}"
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
"${hw_proto}"
DEPENDS "${hw_proto}")
# hw_grpc_proto
add_library(hw_grpc_proto
${hw_grpc_srcs}
${hw_grpc_hdrs}
${hw_proto_srcs}
${hw_proto_hdrs} )
add_executable(${TARGET} grpc-server.cpp json.hpp )
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
protobuf::${_PROTOBUF_LIBPROTOBUF})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if(TARGET BUILD_INFO)
add_dependencies(${TARGET} BUILD_INFO)
endif()

View File

@@ -0,0 +1,53 @@
LLAMA_VERSION?=
CMAKE_ARGS?=
BUILD_TYPE?=
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblast)
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
endif
llama.cpp:
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
if [ -z "$(LLAMA_VERSION)" ]; then \
exit 1; \
fi
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
llama.cpp/examples/grpc-server:
mkdir -p llama.cpp/examples/grpc-server
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
rebuild:
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
rm -rf grpc-server
$(MAKE) grpc-server
clean:
rm -rf llama.cpp
rm -rf grpc-server
grpc-server: llama.cpp llama.cpp/examples/grpc-server
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
cp llama.cpp/build/bin/grpc-server .

View File

File diff suppressed because it is too large Load Diff

24596
backend/cpp/llama/json.hpp Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Image{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,33 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
)
type Image struct {
base.SingleThread
stablediffusion *stablediffusion.StableDiffusion
}
func (image *Image) Load(opts *pb.ModelOptions) error {
var err error
// Note: the Model here is a path to a directory containing the model files
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
return err
}
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
return image.stablediffusion.GenerateImage(
int(opts.Height),
int(opts.Width),
int(opts.Mode),
int(opts.Step),
int(opts.Seed),
opts.PositivePrompt,
opts.NegativePrompt,
opts.Dst)
}

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Image{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,32 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/tinydream"
)
type Image struct {
base.SingleThread
tinydream *tinydream.TinyDream
}
func (image *Image) Load(opts *pb.ModelOptions) error {
var err error
// Note: the Model here is a path to a directory containing the model files
image.tinydream, err = tinydream.New(opts.ModelFile)
return err
}
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
return image.tinydream.GenerateImage(
int(opts.Height),
int(opts.Width),
int(opts.Step),
int(opts.Seed),
opts.PositivePrompt,
opts.NegativePrompt,
opts.Dst)
}

View File

@@ -0,0 +1,34 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
bert "github.com/go-skynet/go-bert.cpp"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
type Embeddings struct {
base.SingleThread
bert *bert.Bert
}
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
model, err := bert.New(opts.ModelFile)
llm.bert = model
return err
}
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
}
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
}

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.Dolly{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.Falcon{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.GPT2{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,62 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
type LLM struct {
base.SingleThread
gpt4all *gpt4all.Model
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := gpt4all.New(opts.ModelFile,
gpt4all.SetThreads(int(opts.Threads)),
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
llm.gpt4all = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(float64(opts.Temperature)),
gpt4all.SetTopP(float64(opts.TopP)),
gpt4all.SetTopK(int(opts.TopK)),
gpt4all.SetTokens(int(opts.Tokens)),
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
}
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
go func() {
llm.gpt4all.SetTokenCallback(func(token string) bool {
results <- token
return true
})
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
llm.gpt4all.SetTokenCallback(nil)
close(results)
}()
return nil
}

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.GPTJ{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.GPTNeoX{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,58 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/langchain"
)
type LLM struct {
base.Base
langchain *langchain.HuggingFace
model string
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
llm.model = opts.Model
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
o := []langchain.PredictOption{
langchain.SetModel(llm.model),
langchain.SetMaxTokens(int(opts.Tokens)),
langchain.SetTemperature(float64(opts.Temperature)),
langchain.SetStopWords(opts.StopPrompts),
}
pred, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
if err != nil {
return "", err
}
return pred.Completion, nil
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
o := []langchain.PredictOption{
langchain.SetModel(llm.model),
langchain.SetMaxTokens(int(opts.Tokens)),
langchain.SetTemperature(float64(opts.Temperature)),
langchain.SetStopWords(opts.StopPrompts),
}
go func() {
res, err := llm.langchain.PredictHuggingFace(opts.Prompt, o...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res.Completion
close(results)
}()
return nil
}

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,204 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp"
)
type LLM struct {
base.SingleThread
llama *llama.LLama
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
llamaOpts := []llama.ModelOption{
llama.WithRopeFreqBase(ropeFreqBase),
llama.WithRopeFreqScale(ropeFreqScale),
}
if opts.NGQA != 0 {
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
}
if opts.RMSNormEps != 0 {
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
}
if opts.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
}
if opts.F16Memory {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if opts.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if opts.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
}
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
} else {
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
}
if opts.NUMA {
llamaOpts = append(llamaOpts, llama.EnableNUMA)
}
if opts.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}
model, err := llama.New(opts.ModelFile, llamaOpts...)
llm.llama = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
predictOptions := []llama.PredictOption{
llama.SetTemperature(opts.Temperature),
llama.SetTopP(opts.TopP),
llama.SetTopK(int(opts.TopK)),
llama.SetTokens(int(opts.Tokens)),
llama.SetThreads(int(opts.Threads)),
llama.WithGrammar(opts.Grammar),
llama.SetRopeFreqBase(ropeFreqBase),
llama.SetRopeFreqScale(ropeFreqScale),
llama.SetNegativePromptScale(opts.NegativePromptScale),
llama.SetNegativePrompt(opts.NegativePrompt),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
}
if opts.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
}
if opts.F16KV {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
results <- token
return true
}))
go func() {
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
return nil
}
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
predictOptions := buildPredictOptions(opts)
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
}
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
}

View File

@@ -0,0 +1,19 @@
package main
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,257 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"path/filepath"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp"
)
type LLM struct {
base.SingleThread
llama *llama.LLama
draftModel *llama.LLama
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
llamaOpts := []llama.ModelOption{
llama.WithRopeFreqBase(ropeFreqBase),
llama.WithRopeFreqScale(ropeFreqScale),
}
if opts.NoMulMatQ {
llamaOpts = append(llamaOpts, llama.SetMulMatQ(false))
}
// Get base path of opts.ModelFile and use the same for lora (assume the same path)
basePath := filepath.Dir(opts.ModelFile)
if opts.LoraAdapter != "" {
llamaOpts = append(llamaOpts, llama.SetLoraAdapter(filepath.Join(basePath, opts.LoraAdapter)))
}
if opts.LoraBase != "" {
llamaOpts = append(llamaOpts, llama.SetLoraBase(filepath.Join(basePath, opts.LoraBase)))
}
if opts.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
}
if opts.F16Memory {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if opts.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if opts.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
}
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
if opts.NBatch != 0 {
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
} else {
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
}
if opts.NUMA {
llamaOpts = append(llamaOpts, llama.EnableNUMA)
}
if opts.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}
if opts.DraftModel != "" {
// https://github.com/ggerganov/llama.cpp/blob/71ca2fad7d6c0ef95ef9944fb3a1a843e481f314/examples/speculative/speculative.cpp#L40
llamaOpts = append(llamaOpts, llama.SetPerplexity(true))
}
model, err := llama.New(opts.ModelFile, llamaOpts...)
if opts.DraftModel != "" {
// opts.DraftModel is relative to opts.ModelFile, so we need to get the basepath of opts.ModelFile
if !filepath.IsAbs(opts.DraftModel) {
dir := filepath.Dir(opts.ModelFile)
opts.DraftModel = filepath.Join(dir, opts.DraftModel)
}
draftModel, err := llama.New(opts.DraftModel, llamaOpts...)
if err != nil {
return err
}
llm.draftModel = draftModel
}
llm.llama = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
ropeFreqBase := float32(10000)
ropeFreqScale := float32(1)
if opts.RopeFreqBase != 0 {
ropeFreqBase = opts.RopeFreqBase
}
if opts.RopeFreqScale != 0 {
ropeFreqScale = opts.RopeFreqScale
}
predictOptions := []llama.PredictOption{
llama.SetTemperature(opts.Temperature),
llama.SetTopP(opts.TopP),
llama.SetTopK(int(opts.TopK)),
llama.SetTokens(int(opts.Tokens)),
llama.SetThreads(int(opts.Threads)),
llama.WithGrammar(opts.Grammar),
llama.SetRopeFreqBase(ropeFreqBase),
llama.SetRopeFreqScale(ropeFreqScale),
llama.SetNegativePromptScale(opts.NegativePromptScale),
llama.SetNegativePrompt(opts.NegativePrompt),
}
if opts.PromptCacheAll {
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
}
if opts.PromptCacheRO {
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
}
// Expected absolute path
if opts.PromptCachePath != "" {
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
}
if opts.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
}
if opts.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
}
if opts.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
}
if opts.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
if opts.PresencePenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
}
if opts.NKeep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
}
if opts.F16KV {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if opts.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
}
if opts.NDraft != 0 {
predictOptions = append(predictOptions, llama.SetNDraft(int(opts.NDraft)))
}
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
if llm.draftModel != nil {
return llm.llama.SpeculativeSampling(llm.draftModel, opts.Prompt, buildPredictOptions(opts)...)
}
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
results <- token
return true
}))
go func() {
var err error
if llm.draftModel != nil {
_, err = llm.llama.SpeculativeSampling(llm.draftModel, opts.Prompt, buildPredictOptions(opts)...)
} else {
_, err = llm.llama.Predict(opts.Prompt, predictOptions...)
}
if err != nil {
fmt.Println("err: ", err)
}
close(results)
}()
return nil
}
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
predictOptions := buildPredictOptions(opts)
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
}
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
}
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
predictOptions := buildPredictOptions(opts)
l, tokens, err := llm.llama.TokenizeString(opts.Prompt, predictOptions...)
if err != nil {
return pb.TokenizationResponse{}, err
}
return pb.TokenizationResponse{
Length: l,
Tokens: tokens,
}, nil
}

View File

@@ -0,0 +1,23 @@
package main
// GRPC Falcon server
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.MPT{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.Replit{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,95 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"path/filepath"
"github.com/donomii/go-rwkv.cpp"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
const tokenizerSuffix = ".tokenizer.json"
type LLM struct {
base.SingleThread
rwkv *rwkv.RwkvState
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
tokenizerFile := opts.Tokenizer
if tokenizerFile == "" {
modelFile := filepath.Base(opts.ModelFile)
tokenizerFile = modelFile + tokenizerSuffix
}
modelPath := filepath.Dir(opts.ModelFile)
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
if model == nil {
return fmt.Errorf("could not load model")
}
llm.rwkv = model
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
return "", err
}
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
return response, nil
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
fmt.Println("Error processing input: ", err)
return
}
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
results <- s
return true
})
close(results)
}()
return nil
}
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
if err != nil {
return pb.TokenizationResponse{}, err
}
l := len(tokens)
i32Tokens := make([]int32, l)
for i, t := range tokens {
i32Tokens[i] = int32(t.ID)
}
return pb.TokenizationResponse{
Length: int32(l),
Tokens: i32Tokens,
}, nil
}

View File

@@ -0,0 +1,23 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &transformers.Starcoder{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,44 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type Dolly struct {
base.SingleThread
dolly *transformers.Dolly
}
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewDolly(opts.ModelFile)
llm.dolly = model
return err
}
func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) {
return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View File

@@ -0,0 +1,43 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type Falcon struct {
base.SingleThread
falcon *transformers.Falcon
}
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewFalcon(opts.ModelFile)
llm.falcon = model
return err
}
func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) {
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View File

@@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type GPT2 struct {
base.SingleThread
gpt2 *transformers.GPT2
}
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
model, err := transformers.New(opts.ModelFile)
llm.gpt2 = model
return err
}
func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View File

@@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type GPTJ struct {
base.SingleThread
gptj *transformers.GPTJ
}
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewGPTJ(opts.ModelFile)
llm.gptj = model
return err
}
func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View File

@@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type GPTNeoX struct {
base.SingleThread
gptneox *transformers.GPTNeoX
}
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewGPTNeoX(opts.ModelFile)
llm.gptneox = model
return err
}
func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View File

@@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type MPT struct {
base.SingleThread
mpt *transformers.MPT
}
func (llm *MPT) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewMPT(opts.ModelFile)
llm.mpt = model
return err
}
func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) {
return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

View File

@@ -0,0 +1,26 @@
package transformers
import (
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
func buildPredictOptions(opts *pb.PredictOptions) []transformers.PredictOption {
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(float64(opts.Temperature)),
transformers.SetTopP(float64(opts.TopP)),
transformers.SetTopK(int(opts.TopK)),
transformers.SetTokens(int(opts.Tokens)),
transformers.SetThreads(int(opts.Threads)),
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(int(opts.Batch)))
}
if opts.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(int(opts.Seed)))
}
return predictOptions
}

View File

@@ -0,0 +1,42 @@
package transformers
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
)
type Replit struct {
base.SingleThread
replit *transformers.Replit
}
func (llm *Replit) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewReplit(opts.ModelFile)
llm.replit = model
return err
}
func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) {
return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
// fallback to Predict
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
if err != nil {
fmt.Println("err: ", err)
}
results <- res
close(results)
}()
return nil
}

Some files were not shown because too many files have changed in this diff Show More