Compare commits

...

249 Commits

Author SHA1 Message Date
Ettore Di Giacinto
3f739575d8 Minor fixes (#285) 2023-05-17 21:01:46 +02:00
ci-robbot [bot]
7e4616646f ⬆️ Update go-skynet/go-bert.cpp (#274)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-17 11:56:32 +02:00
renovate[bot]
44ffaf86ad fix(deps): update github.com/go-skynet/go-llama.cpp digest to b7bbefb (#243)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 11:52:54 +02:00
renovate[bot]
d096644c67 fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 7bff56f (#217)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 11:52:37 +02:00
renovate[bot]
1428600de4 fix(deps): update github.com/go-skynet/go-bert.cpp digest to cea1ed7 (#242)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 09:47:22 +02:00
renovate[bot]
17b18df600 fix(deps): update github.com/donomii/go-rwkv.cpp digest to 6fdd0c3 (#240)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 08:54:52 +02:00
renovate[bot]
cd81dbae1c fix(deps): update github.com/go-skynet/bloomz.cpp digest to e9366e8 (#227)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 08:17:05 +02:00
ci-robbot [bot]
76be06ed56 ⬆️ Update go-skynet/go-gpt2.cpp (#253)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-17 01:47:31 +02:00
renovate[bot]
c2026e01c0 fix(deps): update github.com/mudler/go-stable-diffusion digest to c0748ec (#275)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 01:44:14 +02:00
Ettore Di Giacinto
cdca286be1 docker: add openblas and opencv to images (#277) 2023-05-17 01:30:30 +02:00
ci-robbot [bot]
41de6efca9 ⬆️ Update ggerganov/whisper.cpp (#265)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-17 01:04:14 +02:00
renovate[bot]
63a4ccebdc fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.5 (#264)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-17 01:03:43 +02:00
renovate[bot]
9237c1e91d fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 95b02d7 (#254)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-16 22:34:36 +02:00
Ettore Di Giacinto
9d051c5d4f feat: add image generation with ncnn-stablediffusion (#272) 2023-05-16 19:32:53 +02:00
Ettore Di Giacinto
acd03d15f2 feat: add support for cublas/openblas in the llama.cpp backend (#258) 2023-05-16 16:26:25 +02:00
Ettore Di Giacinto
a035de2fdd tests: add rwkv (#261) 2023-05-15 08:15:01 +02:00
Ettore Di Giacinto
76a1267799 bump: update whisper.cpp (#260) 2023-05-15 01:00:16 +02:00
Ettore Di Giacinto
e533b008d4 docs: update README 2023-05-14 18:09:26 +02:00
Ettore Di Giacinto
a4380228e3 docs: update README 2023-05-14 18:08:42 +02:00
Ettore Di Giacinto
2a9d7474ce fix(rwkv): load tokenizer file from model path (#255) 2023-05-14 17:49:10 +02:00
Ettore Di Giacinto
850a690290 docs: update README 2023-05-14 11:14:09 +02:00
Ettore Di Giacinto
39edd9ff73 docs: update README short-term roadmap 2023-05-14 11:12:29 +02:00
ci-robbot [bot]
b82bbbfc6b ⬆️ Update ggerganov/whisper.cpp (#218)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-14 10:03:55 +02:00
mudler
023c065812 docs: Fix typo 2023-05-13 22:14:35 +02:00
mudler
a627a6c4e2 docs: Update README 2023-05-13 22:14:09 +02:00
ci-robbot [bot]
6c9ddff8e9 ⬆️ Update go-skynet/go-llama.cpp (#245)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-13 22:07:43 +02:00
ci-robbot [bot]
c5318587b8 ⬆️ Update go-skynet/go-bert.cpp (#247)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-13 14:36:01 +02:00
mudler
c3622299ce docs: cleanup langchain-chroma example 2023-05-13 11:16:56 +02:00
Ettore Di Giacinto
de36a48861 Update gpt4all to fix thread counts (#249) 2023-05-13 09:37:46 +02:00
mudler
961ca93219 docs: Update README 2023-05-13 00:46:48 +02:00
Ettore Di Giacinto
557ccc5ad8 examples: add langchain-chroma example (#248) 2023-05-12 22:20:07 +02:00
Ettore Di Giacinto
2488c445b6 feat: bert.cpp token embeddings (#241) 2023-05-12 17:16:49 +02:00
Ettore Di Giacinto
b4241d0a0d tests: enable whisper (#239) 2023-05-12 14:10:18 +02:00
Ettore Di Giacinto
8250391e49 Add support for gptneox/replit (#238) 2023-05-12 11:36:35 +02:00
Ettore Di Giacinto
fd1df4e971 whisper: add tests and allow to set upload size (#237) 2023-05-12 10:04:20 +02:00
ci-robbot [bot]
5115b2faa3 ⬆️ Update go-skynet/go-llama.cpp (#219)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 23:43:55 +02:00
ci-robbot [bot]
93e82a8bf4 ⬆️ Update go-skynet/go-gpt2.cpp (#220)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 23:43:44 +02:00
Ettore Di Giacinto
4413defca5 feat: add starcoder (#236) 2023-05-11 20:20:07 +02:00
Ettore Di Giacinto
f359e1c6c4 fix: dolly/rp (#235) 2023-05-11 19:38:27 +02:00
renovate[bot]
1bc87d582d fix(deps): update module github.com/sashabaranov/go-openai to v1.9.4 (#230)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:02:19 +02:00
renovate[bot]
a86a383357 fix(deps): update github.com/donomii/go-rwkv.cpp digest to 07166da (#224)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:01:52 +02:00
renovate[bot]
16f02c7b30 fix(deps): update github.com/go-skynet/go-bert.cpp digest to ec771ec (#223)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:01:35 +02:00
Ettore Di Giacinto
fe2706890c Update README.md 2023-05-11 17:32:13 +02:00
Ettore Di Giacinto
85f0f8227d refactor: drop code dups (#234) 2023-05-11 16:34:16 +02:00
Ettore Di Giacinto
59e3c02002 make use of new bindings for gpt4all (#232) 2023-05-11 14:31:19 +02:00
Matthew Campbell
032dee256f Keep whisper models in memory (#233) 2023-05-11 14:05:07 +02:00
Matthew Campbell
6b5e2b2bf5 Upload transcription API wasn't reading the data from the post (#229) 2023-05-11 10:43:05 +02:00
renovate[bot]
6fc303de87 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 70593fc (#221)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 10:34:34 +02:00
renovate[bot]
6ad6e4873d fix(deps): update github.com/ggerganov/whisper.cpp/bindings/go digest to 1d17cd5 (#216)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 01:14:34 +02:00
ci-robbot [bot]
d6d7391da8 ⬆️ Update donomii/go-rwkv.cpp (#225)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 01:13:28 +02:00
Ettore Di Giacinto
11675932ac feat: add dolly/redpajama/bloomz models support (#214) 2023-05-11 01:12:58 +02:00
Ettore Di Giacinto
f02202e1e1 update README 2023-05-10 15:51:16 +02:00
Ettore Di Giacinto
f8ee20991c feat: add bert.cpp embeddings (#222) 2023-05-10 15:20:21 +02:00
Cedrik Boudreau
e6db14e2f1 Added spark in projects (#215) 2023-05-10 14:05:44 +02:00
Dave
d00886abea Tiny .gitignore suggestion (#213) 2023-05-09 20:03:29 +02:00
renovate[bot]
4873d2bfa1 fix(deps): update github.com/go-skynet/go-llama.cpp digest to f4d26f4 (#212)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-09 12:17:15 +02:00
Ettore Di Giacinto
9f426578cf feat: add transcript endpoint (#211) 2023-05-09 11:43:50 +02:00
ci-robbot [bot]
9d01b695a8 ⬆️ Update go-skynet/go-llama.cpp (#209)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-08 22:37:16 +02:00
Ettore Di Giacinto
93829ab228 docs: update news 2023-05-08 22:34:12 +02:00
renovate[bot]
dd234f86d5 fix(deps): update github.com/go-skynet/go-llama.cpp digest to c03e8ad (#208)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-08 20:30:52 +02:00
mudler
3daff6f1aa doc: update README with embeddings docs 2023-05-08 20:02:59 +02:00
Ettore Di Giacinto
89dfa0f5fc feat: add experimental support for embeddings as arrays (#207) 2023-05-08 19:31:18 +02:00
renovate[bot]
bc03c492a0 fix(deps): update module github.com/gofiber/fiber/v2 to v2.45.0 (#206)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-07 17:46:31 +02:00
Fabian Hachenberg
f50a4c1454 Added runpod.io template for LocalAI to examples (#203) 2023-05-07 10:58:15 +02:00
mudler
d13d4d95ce examples: fix default parameter 2023-05-07 10:13:57 +02:00
renovate[bot]
428790ec06 fix(deps): update github.com/go-skynet/go-llama.cpp digest to cf9b522 (#202)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-07 09:14:00 +02:00
mudler
4f551ce414 examples: add update index example, update README 2023-05-07 09:05:24 +02:00
mudler
6ed7b10273 examples: add langchain agent example 2023-05-07 08:14:01 +02:00
mudler
02979566ee examples: better defaults 2023-05-07 00:58:30 +02:00
ci-robbot [bot]
cbdcc839f3 ⬆️ Update go-skynet/go-llama.cpp (#201)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-06 22:49:44 +02:00
mudler
e1c8f087f4 Update README 2023-05-06 19:18:03 +02:00
mudler
3a90ea44a5 Update readme and examples 2023-05-06 19:15:22 +02:00
renovate[bot]
e55492475d fix(deps): update github.com/go-skynet/go-llama.cpp digest to 691d479 (#189)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-06 00:42:31 +02:00
Dave
07ec2e441d mini fix - OpenAI documentation url (#200) 2023-05-06 00:42:08 +02:00
ci-robbot [bot]
38d7e0b43c ⬆️ Update go-skynet/go-llama.cpp (#198)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-06 00:21:48 +02:00
Dave
3411bfd00d Langchain Example Updates (#199) 2023-05-06 00:21:06 +02:00
Ettore Di Giacinto
7e5fe35ae4 Mixed enhancements (#196) 2023-05-06 00:00:58 +02:00
mudler
8c8cf38d4d tests: use 1 core 2023-05-05 23:29:34 +02:00
mudler
75b25297fd tests: run with ginkgo 2023-05-05 22:51:30 +02:00
mudler
009ee47fe2 Don't allow 0 as thread count 2023-05-05 22:51:20 +02:00
mudler
ec2adc2c03 tests: use 3 cores 2023-05-05 22:07:01 +02:00
mudler
ad301e6ed7 example(add): document query example 2023-05-05 21:56:31 +02:00
mudler
d094381e5d ci: lower fixtures spec 2023-05-05 21:28:38 +02:00
mudler
3ff9bbd217 examples: add rwkv script folder 2023-05-05 19:04:52 +02:00
mudler
e62ee2bc06 fix: remove trailing 0s from embeddings
This happens when no max_tokens are set, so by default go-llama
allocates more space for the slice and padding happens.
2023-05-05 18:35:03 +02:00
mudler
b49721cdd1 fix: respect config from file for backends settings 2023-05-05 18:05:10 +02:00
mudler
64c0a7967f fix: pass prediction options when using the model 2023-05-05 15:56:02 +02:00
mudler
e96eadab40 feat: support deprecated embeddings API 2023-05-05 15:55:19 +02:00
mudler
e73283121b feat: support arrays for prompt and input
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-05 15:54:59 +02:00
mudler
857d13e8d6 debug: wire up go-fiber debugger 2023-05-05 15:53:57 +02:00
ci-robbot [bot]
91db3d4d5c ⬆️ Update go-skynet/go-llama.cpp (#194)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-05 13:45:50 +02:00
Ettore Di Giacinto
961cf29217 feat: expose mirostat to config (#193) 2023-05-05 13:45:37 +02:00
Ettore Di Giacinto
c839b334eb feat: add embeddings for go-llama.cpp backend (#190) 2023-05-05 11:20:06 +02:00
Ettore Di Giacinto
714bfcd45b fix: missing returning error and free callback stream (#187) 2023-05-04 19:49:43 +02:00
renovate[bot]
77ce8b953e fix(deps): update github.com/donomii/go-rwkv.cpp digest to af62fcc (#171)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:48 +02:00
renovate[bot]
01ada95941 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 2e6ae12 (#172)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:11 +02:00
ci-robbot [bot]
eabdc5042a ⬆️ Update go-skynet/go-llama.cpp (#184)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-04 18:28:49 +02:00
Dhruv Gera
96267d9437 localai: Include the WebUI project example (#130)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-04 18:27:58 +02:00
Ettore Di Giacinto
9497a24127 fix: hardcode default number of cores to '4' (#186) 2023-05-04 18:14:58 +02:00
Ettore Di Giacinto
fdf75c6d0e rwkv fixes and examples (#185) 2023-05-04 17:32:23 +02:00
mudler
6352308882 ci: minor fixups 2023-05-04 15:08:20 +02:00
mudler
a8172a0f4e ci: fix typo 2023-05-04 15:04:41 +02:00
mudler
ebcd10d66f ci: manually update deps 2023-05-04 15:01:29 +02:00
mudler
885642915f ci: add renovate suffix 2023-05-04 12:26:59 +02:00
mudler
2e424491c0 ci: lookupNameTemplate -> depNameTemplate 2023-05-04 12:23:05 +02:00
mudler
aa6faef8f7 ci: versioning -> versioningTemplate 2023-05-04 12:07:29 +02:00
mudler
b3254baf60 ci: add versioning 2023-05-04 12:05:39 +02:00
mudler
0a43d27f0e ci: update renovate 2023-05-04 12:02:19 +02:00
Ettore Di Giacinto
3fe11fe24d ci: attempt to configure renovate with custom regexes (#178) 2023-05-04 11:55:14 +02:00
renovate[bot]
af18fdc749 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.3 (#174)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:44:02 +02:00
renovate[bot]
32b5eddd7d fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.4 (#173)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:41:51 +02:00
Dave
07c3aa1869 Dockerized Langchain / PY example (#175) 2023-05-04 08:41:13 +02:00
renovate[bot]
e59bad89e7 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.2 (#164)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 23:05:50 +02:00
Jeremy Price
b971807980 Looks for models in $CWD/models/ dir by default (#169) 2023-05-03 23:03:31 +02:00
Ettore Di Giacinto
c974dad799 Return usage in the API responses (#166) 2023-05-03 17:29:18 +02:00
Ettore Di Giacinto
4eae570ef5 Update docs (#163) 2023-05-03 15:51:54 +02:00
Ettore Di Giacinto
67992a7d99 feat: support slices or strings in the prompt completion endpoint (#162)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-03 13:13:31 +02:00
renovate[bot]
0a4899f366 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 8ceb616 (#150)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 11:48:06 +02:00
renovate[bot]
1eb02f6c91 fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.3 (#161)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 11:47:54 +02:00
mudler
575874e4fb readme: minor update 2023-05-03 11:46:29 +02:00
Ettore Di Giacinto
751b7eca62 feat: add rwkv support (#158)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-03 11:45:22 +02:00
Ettore Di Giacinto
1ae7150810 feat: allow to specify default backend for model (#156)
Signed-off-by: mudler <mudler@c3os.io>
2023-05-03 00:31:28 +02:00
Ettore Di Giacinto
70caf9bf8c feat: support stopwords both string and arrays (#154) 2023-05-02 23:30:00 +02:00
Dave
0b226ac027 Stop parameter of OpenAIRequest changed to String Array (#153) 2023-05-02 22:02:45 +02:00
Ettore Di Giacinto
220d6fd59b feat: add stream events (#152) 2023-05-02 20:03:35 +02:00
antongisli
0a00a4b58e adding mac build and example (#151)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-02 19:24:45 +02:00
Ettore Di Giacinto
156e15a4fa Bump llama.cpp, downgrade gpt4all-j (#149) 2023-05-02 16:07:18 +02:00
renovate[bot]
271d3f6673 fix(deps): update module github.com/valyala/fasthttp to v1.47.0 (#143)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 23:36:58 +02:00
Ettore Di Giacinto
fec4ab93c5 docs: Add langchain to the example index (#147)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 23:21:07 +02:00
renovate[bot]
38a7a7a54d fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 77bf8c1 (#141)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-01 23:18:41 +02:00
Ettore Di Giacinto
0db0704e2c docs: Add slack-bot example (#145)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 23:18:24 +02:00
Dave
88f472e5d2 Add LangchainJS Examples (#146) 2023-05-01 23:18:14 +02:00
Ettore Di Giacinto
92452d46da feat: add new gpt4all-j binding (#142) 2023-05-01 20:00:15 +02:00
Ettore Di Giacinto
ac70252d70 drop: remove helm charts, now in separate repo (#134)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 18:07:41 +02:00
renovate[bot]
f6451d2518 fix(deps): update module github.com/urfave/cli/v2 to v2.25.3 (#140)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 18:07:29 +02:00
Ettore Di Giacinto
2473f9d19b docs: add discord-bot preview (#137) 2023-05-01 11:03:34 +02:00
renovate[bot]
bc583385a9 fix(deps): update module github.com/urfave/cli/v2 to v2.25.2 (#136)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 07:53:48 +02:00
renovate[bot]
8286bfbab7 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.1 (#135)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 07:52:20 +02:00
Ettore Di Giacinto
d129fabe3b docs: enhancements (#133) 2023-04-30 23:27:02 +02:00
renovate[bot]
2539867247 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 377fd24 (#129)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-30 11:09:48 +02:00
renovate[bot]
69fedb92d9 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 361b9f8 (#127)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-30 08:47:27 +02:00
Ettore Di Giacinto
54b5eadcc4 docs: add discord-bot example (#126) 2023-04-30 00:31:28 +02:00
Ettore Di Giacinto
16773e2a35 feat: make images to build sources on start (#124)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-29 20:38:37 +02:00
renovate[bot]
78503c62b7 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 9bf702f (#125)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-29 16:53:39 +02:00
Ettore Di Giacinto
a330c9cee5 update: bump llama.cpp to 7f15c5c (#122)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-29 15:20:50 +02:00
Ettore Di Giacinto
ff0867996e tests: increase timeout (#121) 2023-04-29 14:56:00 +02:00
Ettore Di Giacinto
1bf8f996d1 docs: clarify GPT4ALL-J licensing (#120) 2023-04-29 14:50:22 +02:00
Ettore Di Giacinto
52f4d993c1 feat: add /edit endpoint (#119) 2023-04-29 09:22:09 +02:00
renovate[bot]
d0ceebc5d7 fix(deps): update module github.com/valyala/fasthttp to v1.46.0 (#118)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-28 22:44:29 +02:00
renovate[bot]
9122af3ae1 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 3d084e4 (#108)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-28 19:24:49 +02:00
Ettore Di Giacinto
b8533428bc bump: update llama.cpp (#117)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-28 19:24:28 +02:00
Ettore Di Giacinto
677905334c docs: reorder section (#116) 2023-04-28 13:55:23 +02:00
Mauro Morales
d1d55d29a0 Add Kairos LocalAI example to the links (#115) 2023-04-28 13:52:17 +02:00
Ettore Di Giacinto
e07dba7ad6 docs: Add contributors (#113)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-28 10:54:39 +02:00
Matthieu Talbot
062f832510 Add EXPOSE to Dockerfile (#107) 2023-04-27 16:45:24 +00:00
Ettore Di Giacinto
d0330bb64b docs: update example README.md (#104) 2023-04-27 17:46:14 +02:00
antongisli
91a23ec6ec Anton readme (#99)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-04-27 17:17:03 +02:00
Ron Evans
0b000dd043 examples: correct typo in README (#103)
Signed-off-by: deadprogram <ron@hybridgroup.com>
2023-04-27 17:14:38 +02:00
Ettore Di Giacinto
c73ba91a66 docs: update README 2023-04-27 15:39:48 +02:00
Ettore Di Giacinto
dfc00f8bc1 docs: update README.md (#98) 2023-04-27 15:06:55 +02:00
Ettore Di Giacinto
a18ff9c9b3 docs: move api docs (#96) 2023-04-27 10:42:50 +02:00
Ettore Di Giacinto
d0199279ad docs: update, add config docs (#94) 2023-04-27 10:39:01 +02:00
Ettore Di Giacinto
9ede1e12d8 few typos and clarity changes (#91) (#92)
Co-authored-by: antongisli <anton@huge.geek.nz>
2023-04-27 07:47:39 +02:00
Ettore Di Giacinto
c806eae0de feat: config files and SSE (#83)
Signed-off-by: mudler <mudler@mocaccino.org>
Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com>
Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com>
2023-04-26 21:18:18 -07:00
renovate[bot]
4e2061636e chore(deps): update actions/checkout action to v3 (#82)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-25 07:46:29 +02:00
renovate[bot]
e3ef171968 fix(deps): update module github.com/gofiber/fiber/v2 to v2.44.0 (#81)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-25 07:46:14 +02:00
Ettore Di Giacinto
12d83a4184 feat: Return OpenAI errors and update docs (#80)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-24 23:42:03 +02:00
renovate[bot]
045412e8dd fix(deps): update module github.com/urfave/cli/v2 to v2.25.1 (#78)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 18:16:23 +02:00
renovate[bot]
9896a9a58b fix(deps): update github.com/go-skynet/go-llama.cpp digest to e45cebe (#77)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 18:16:10 +02:00
Ettore Di Giacinto
b9011bda59 feat: automatic updates with renovate, docs updates (#76) 2023-04-24 18:10:58 +02:00
Ettore Di Giacinto
2b2f5fa36a feat: update llama.cpp (#72) 2023-04-24 14:15:49 +02:00
renovate[bot]
43c557dc5c fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 1f7bff5 (#74)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:14:21 +02:00
renovate[bot]
7abb2c9bd7 fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 245a5bf (#73)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:13:04 +02:00
renovate[bot]
7a9ea4480a Configure Renovate (#71)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:11:39 +02:00
Vladimir Malyutin
31bcc558de Update README.md (#62) 2023-04-22 14:42:30 +02:00
Ettore Di Giacinto
676e15f785 fix: make MacOS builds work (#61) 2023-04-22 11:05:23 +02:00
Marc R Kellerman
3e71c90949 feature: add devcontainer for live debugging (#60) 2023-04-22 01:20:03 +02:00
Ettore Di Giacinto
550ae9c968 docs: add Discord channel link (#59) 2023-04-22 00:46:17 +02:00
Ettore Di Giacinto
1c872ec326 feat: add CI/tests (#58)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-22 00:44:52 +02:00
Marc R Kellerman
05f35b182c fix(makefile): fix go-gpt2 folder and add verification before git clone (#51)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-04-22 00:29:32 +02:00
Ettore Di Giacinto
79791438fe Use the first available model if not specified (#55)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 22:54:43 +02:00
Tyler Gillson
bf20cc34f6 feat: Add helm chart (#56) 2023-04-21 13:22:03 -07:00
Ettore Di Giacinto
5cba71de70 Add stopwords, debug mode, and other API enhancements (#54)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 19:46:59 +02:00
Ettore Di Giacinto
4b7e83056d Update .env 2023-04-21 01:47:35 +02:00
Ettore Di Giacinto
ed954d66c3 Do not take all CPU by default (#50) 2023-04-21 00:55:19 +02:00
Ettore Di Giacinto
f816dfae65 Add support for stablelm (#48)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 00:06:55 +02:00
Ettore Di Giacinto
142bcd66ca Cleanup makefile, fix dep versions (#46)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-20 19:49:06 +02:00
Ettore Di Giacinto
1c4fbaae20 Add support for cerebras (#45)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-20 19:33:36 +02:00
Ettore Di Giacinto
d517a54e28 Major API enhancements (#44) 2023-04-20 18:33:02 +02:00
Tyler Gillson
c905512bb0 Update example K8s manifests (#40) 2023-04-20 18:31:11 +02:00
Ettore Di Giacinto
1254951fab Add logo (#37)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-19 19:03:12 +02:00
Ettore Di Giacinto
80f50e6ccd Rename project to LocalAI (#35)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-19 18:43:10 +02:00
Ettore Di Giacinto
7fec26f5d3 Enhancements (#34)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-19 17:10:29 +02:00
Ettore Di Giacinto
a9a875ee2b ⬆️ Bump llama.cpp (#33)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-17 21:34:02 +02:00
Ettore Di Giacinto
db5ac715f3 Use a reasonable default context size (#31) 2023-04-17 18:45:42 +02:00
Ettore Di Giacinto
0b330d90ad feat: drop embedded webui (#27)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-16 10:46:20 +02:00
Ettore Di Giacinto
63601fabd1 feat: drop default model and llama-specific API (#26)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-16 10:40:50 +02:00
Ettore Di Giacinto
1370b4482f 📖 Add prompt-templates examples (#25)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-16 10:24:15 +02:00
Ettore Di Giacinto
b062f3142b feat: enhance API, expose more parameters (#24)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-16 10:16:48 +02:00
Marc R Kellerman
c37175271f feature: makefile & updates (#23)
Co-authored-by: mudler <mudler@c3os.io>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-04-15 16:39:07 -07:00
Ettore Di Giacinto
e8eab66c30 Merge pull request #22 from go-skynet/update-llama.cpp
⬆️ Update go-llama.cpp to `llama.cpp-2f7c8e0`
2023-04-16 00:06:52 +02:00
mudler
a73a497143 Update llama.cpp 2023-04-15 23:57:00 +02:00
Ettore Di Giacinto
6aea515e1d Merge pull request #20 from go-skynet/mudler-patch-1
📖 Update README.md
2023-04-15 00:38:30 +02:00
Ettore Di Giacinto
dfc2b7e02a 📖 Update README.md 2023-04-15 00:38:18 +02:00
Ettore Di Giacinto
040290971c Merge pull request #19 from go-skynet/tags
Use tags for go-llama.cpp
2023-04-15 00:14:47 +02:00
mudler
553bad585e Use tags for go-llama.cpp 2023-04-15 00:07:39 +02:00
Ettore Di Giacinto
f76b612506 Merge pull request #17 from go-skynet/mudler-patch-1
Fix comment typo
2023-04-13 15:21:13 +02:00
Ettore Di Giacinto
c4e94c88d7 Fix comment typo
Thanks to @deadprogram for noticing it!
2023-04-13 15:20:51 +02:00
mudler
a9cd6b3ca3 ci: Fix tag detection for 'latest' 2023-04-13 01:37:09 +02:00
mudler
e786576b95 Update README 2023-04-13 01:28:15 +02:00
Ettore Di Giacinto
d426571789 Merge pull request #16 from go-skynet/fix_arm
Drop armv7 builds
2023-04-13 01:21:58 +02:00
mudler
a896a2b5ad Drop armv7 builds 2023-04-13 01:21:40 +02:00
Ettore Di Giacinto
8273cd5c04 Merge pull request #15 from go-skynet/docker-compose
Add docker-compose file
2023-04-13 01:17:44 +02:00
mudler
16f1281d38 Minor workflow fixes 2023-04-13 01:16:13 +02:00
mudler
8042e9a2d6 Add docker-compose
Fixes #14

Signed-off-by: mudler <mudler@c3os.io>
2023-04-13 01:13:14 +02:00
mudler
624092cb99 Update README 2023-04-12 00:07:30 +02:00
mudler
a422a883ac Minor rephrasing 2023-04-12 00:04:15 +02:00
mudler
7858a97254 Update README 2023-04-12 00:02:47 +02:00
mudler
5556aa46dd Small refinements and refactors 2023-04-12 00:02:39 +02:00
mudler
eb4257f946 Add .gitignore 2023-04-11 23:44:00 +02:00
mudler
ae30bd346d Reorganize repository layout 2023-04-11 23:43:43 +02:00
mudler
93d8977ba2 Return model list 2023-04-10 12:02:40 +02:00
mudler
f43aeeb4a1 Add both API endpoints (completion, chat) 2023-04-09 12:30:55 +02:00
mudler
c17dcc5e9d Allow to inject prompt as part of the call 2023-04-09 09:36:19 +02:00
mudler
4a932483e1 Small fixup to template loading 2023-04-08 11:59:40 +02:00
mudler
b710147b95 Add mutex on same models (parallel isn't supported yet) 2023-04-08 11:45:36 +02:00
mudler
ba70363330 Use template input 2023-04-08 11:24:25 +02:00
mudler
9fb581739b Allow to template model prompts inputs 2023-04-08 10:46:51 +02:00
mudler
48aca246e3 Drop unused interactive mode 2023-04-07 11:31:14 +02:00
mudler
12eee097b7 Make it compatible with openAI api, support multiple models
Signed-off-by: mudler <mudler@c3os.io>
2023-04-07 11:30:59 +02:00
mudler
b33d015b8c Use go-llama.cpp 2023-04-07 10:08:15 +02:00
Ettore Di Giacinto
b7c0a108f5 Update README.md 2023-04-05 22:28:03 +02:00
Ettore Di Giacinto
f694a89c28 Update README.md 2023-04-05 22:14:00 +02:00
Ettore Di Giacinto
be682e6c2f Update README.md
Add short-term roadmap and mention webui
2023-04-05 22:04:35 +02:00
mudler
bf85a31f9e Don't set a default model path 2023-04-05 22:00:15 +02:00
Ettore Di Giacinto
d69048e0b0 Update README.md 2023-04-05 00:41:02 +02:00
mudler
827f189163 Update README 2023-03-30 18:46:11 +02:00
mudler
a23deb5ec7 Drop duplicate target 2023-03-29 19:44:41 +02:00
mudler
999676b106 Add gpt4all instructions 2023-03-29 18:58:54 +02:00
mudler
c61b023bc8 Drop fat images, will document how to consume models 2023-03-29 18:55:24 +02:00
mudler
650a22aef1 Add compatibility to gpt4all models 2023-03-29 18:53:24 +02:00
mudler
17b1724f7c Update llama-go 2023-03-27 01:18:14 +02:00
mudler
e860e62036 Add mutex, build only lite images 2023-03-27 01:01:38 +02:00
Ettore Di Giacinto
1f45ff8cd6 Update README.md 2023-03-26 23:37:26 +02:00
mudler
abee34f60a Cleanup leftover 2023-03-25 01:10:50 +01:00
mudler
dbc70dc13c Add a simple web-page as index of the API for helping with inference testing 2023-03-25 01:09:51 +01:00
mudler
55142065eb Update README with building instructions 2023-03-24 01:11:13 +01:00
mudler
d83d2293b5 Update version in kubernetes deployment 2023-03-23 23:22:43 +01:00
mudler
467ce5a7aa Update models download instructions, update images 2023-03-23 22:06:41 +01:00
121 changed files with 7433 additions and 908 deletions

3
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,3 @@
ARG GO_VERSION=1.20
FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
RUN apt-get update && apt-get install -y cmake

View File

@@ -0,0 +1,46 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
{
"name": "Existing Docker Compose (Extend)",
// Update the 'dockerComposeFile' list if you have more compose files or use different names.
// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
"dockerComposeFile": [
"../docker-compose.yaml",
"docker-compose.yml"
],
// The 'service' property is the name of the service for the container that VS Code should
// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
"service": "api",
// The optional 'workspaceFolder' property is the path VS Code should open by default when
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
"workspaceFolder": "/workspace",
"features": {
"ghcr.io/devcontainers/features/go:1": {},
"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
},
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Uncomment the next line if you want start specific services in your Docker Compose config.
// "runServices": [],
// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
// "shutdownAction": "none",
// Uncomment the next line to run commands after the container is created.
"postCreateCommand": "make prepare"
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "devcontainer"
}

View File

@@ -0,0 +1,26 @@
version: '3.6'
services:
# Update this to the name of the service you want to work with in your docker-compose.yml file
api:
# Uncomment if you want to override the service's Dockerfile to one in the .devcontainer
# folder. Note that the path of the Dockerfile and context is relative to the *primary*
# docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
# array). The sample below assumes your primary file is in the root of your project.
#
build:
context: .
dockerfile: .devcontainer/Dockerfile
volumes:
# Update this to wherever you want VS Code to mount the folder of your project
- .:/workspace:cached
# Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
# cap_add:
# - SYS_PTRACE
# security_opt:
# - seccomp:unconfined
# Overrides default command so things don't shut down after the process ends.
command: /bin/sh -c "while sleep 1000; do :; done"

4
.dockerignore Normal file
View File

@@ -0,0 +1,4 @@
models
examples/chatbot-ui/models
examples/rwkv/models
examples/**/models

5
.env Normal file
View File

@@ -0,0 +1,5 @@
# THREADS=14
# CONTEXT_SIZE=512
MODELS_PATH=/models
# DEBUG=true
# BUILD_TYPE=generic

9
.github/bump_deps.sh vendored Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -xe
REPO=$1
BRANCH=$2
VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"

51
.github/workflows/bump_deps.yaml vendored Normal file
View File

@@ -0,0 +1,51 @@
name: Bump dependencies
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
strategy:
fail-fast: false
matrix:
include:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-gpt2.cpp"
variable: "GOGPT2_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
- repository: "ggerganov/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
- repository: "go-skynet/go-bert.cpp"
variable: "BERT_VERSION"
branch: "master"
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main"
- repository: "nomic-ai/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Bump dependencies 🔧
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version
signoff: true

View File

@@ -2,6 +2,7 @@
name: 'build container images'
on:
pull_request:
push:
branches:
- master
@@ -12,78 +13,66 @@ jobs:
docker:
runs-on: ubuntu-latest
steps:
- name: Release space from worker
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
df -h
- name: Checkout
uses: actions/checkout@v3
- name: Prepare
id: prep
run: |
DOCKER_IMAGE=quay.io/go-skynet/llama-cli
VERSION=latest
DOCKER_IMAGE=quay.io/go-skynet/local-ai
VERSION=master
SHORTREF=${GITHUB_SHA::8}
# If this is git tag, use the tag name as a docker tag
if [[ $GITHUB_REF == refs/tags/* ]]; then
VERSION=${GITHUB_REF#refs/tags/}
fi
TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
# If the VERSION looks like a version number, assume that
# this is the most recent version of the image and also
# tag it 'latest'.
if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
TAGS="$TAGS,${DOCKER_IMAGE}:latest"
fi
# Set output parameters.
echo ::set-output name=tags::${TAGS}
echo ::set-output name=docker_image::${DOCKER_IMAGE}
echo ::set-output name=image::${DOCKER_IMAGE}:${VERSION}
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@master
- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- uses: earthly/actions/setup-earthly@v1
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
- name: Build
run: |
earthly config "global.conversion_parallelism" "1"
earthly config "global.buildkit_max_parallelism" "1"
earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }}
if: github.event_name != 'pull_request'
uses: docker/build-push-action@v4
with:
builder: ${{ steps.buildx.outputs.name }}
context: .
file: ./Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.prep.outputs.tags }}
- name: Build PRs
if: github.event_name == 'pull_request'
uses: docker/build-push-action@v4
with:
builder: ${{ steps.buildx.outputs.name }}
context: .
file: ./Dockerfile
platforms: linux/amd64
push: false
tags: ${{ steps.prep.outputs.tags }}

44
.github/workflows/test.yml vendored Normal file
View File

@@ -0,0 +1,44 @@
---
name: 'tests'
on:
pull_request:
push:
branches:
- master
tags:
- '*'
jobs:
ubuntu-latest:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v3
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
- name: Test
run: |
make test
macOS-latest:
runs-on: macOS-latest
steps:
- name: Clone
uses: actions/checkout@v3
with:
submodules: true
- name: Dependencies
run: |
brew update
brew install sdl2 ffmpeg
- name: Test
run: |
make test

20
.gitignore vendored Normal file
View File

@@ -0,0 +1,20 @@
# go-llama build artifacts
go-llama
gpt4all
go-stable-diffusion
go-gpt2
go-rwkv
whisper.cpp
# LocalAI build binary
LocalAI
local-ai
# prevent above rules from omitting the helm chart
!charts/*
# Ignore models
models/*
test-models/
# just in case
.DS_Store

View File

@@ -1,5 +1,5 @@
# Make sure to check the documentation at http://goreleaser.com
project_name: llama-cli
project_name: local-ai
builds:
- ldflags:
- -w -s

20
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,20 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Launch Go",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}/main.go",
"args": [
"api"
],
"env": {
"C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
"LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
"DEBUG": "true"
}
}
]
}

10
Dockerfile Normal file
View File

@@ -0,0 +1,10 @@
ARG GO_VERSION=1.20
ARG BUILD_TYPE=
FROM golang:$GO_VERSION
WORKDIR /build
RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5
COPY . .
RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
RUN make prepare-sources
EXPOSE 8080
ENTRYPOINT [ "/build/entrypoint.sh" ]

15
Dockerfile.dev Normal file
View File

@@ -0,0 +1,15 @@
ARG GO_VERSION=1.20
ARG DEBIAN_VERSION=11
ARG BUILD_TYPE=
FROM golang:$GO_VERSION as builder
WORKDIR /build
RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5
RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
COPY . .
RUN make build
FROM debian:$DEBIAN_VERSION
COPY --from=builder /build/local-ai /usr/bin/local-ai
EXPOSE 8080
ENTRYPOINT [ "/usr/bin/local-ai" ]

View File

@@ -1,47 +1,5 @@
VERSION 0.7
go-deps:
ARG GO_VERSION=1.20
FROM golang:$GO_VERSION
WORKDIR /build
COPY go.mod ./
COPY go.sum ./
RUN go mod download
RUN apt-get update
SAVE ARTIFACT go.mod AS LOCAL go.mod
SAVE ARTIFACT go.sum AS LOCAL go.sum
model-image:
ARG MODEL_IMAGE=quay.io/go-skynet/models:ggml2-alpaca-7b-v0.2
FROM $MODEL_IMAGE
SAVE ARTIFACT /models/model.bin
build:
FROM +go-deps
WORKDIR /build
RUN git clone https://github.com/go-skynet/llama
RUN cd llama && make libllama.a
COPY . .
RUN C_INCLUDE_PATH=/build/llama LIBRARY_PATH=/build/llama go build -o llama-cli ./
SAVE ARTIFACT llama-cli AS LOCAL llama-cli
image:
FROM +go-deps
ARG IMAGE=alpaca-cli
COPY +model-image/model.bin /model.bin
COPY +build/llama-cli /llama-cli
ENV MODEL_PATH=/model.bin
ENTRYPOINT [ "/llama-cli" ]
SAVE IMAGE --push $IMAGE
lite-image:
FROM +go-deps
ARG IMAGE=alpaca-cli-nomodel
COPY +build/llama-cli /llama-cli
ENV MODEL_PATH=/model.bin
ENTRYPOINT [ "/llama-cli" ]
SAVE IMAGE --push $IMAGE-lite
image-all:
BUILD --platform=linux/amd64 --platform=linux/arm64 +image
BUILD --platform=linux/amd64 --platform=linux/arm64 +lite-image
FROM DOCKERFILE -f Dockerfile .
SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai

230
Makefile Normal file
View File

@@ -0,0 +1,230 @@
GOCMD=go
GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
GOLLAMA_VERSION?=7f9ae4246088f0c08ed322acbae21d69cd2eb547
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=a07237e54fcdfdb351913587052ac061a2a7bdff
GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
WHISPER_CPP_VERSION?=95b02d76b04d18e4ce37ed8353a1f0797f1717ea
BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
BUILD_TYPE?=
CGO_LDFLAGS?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
GO_TAGS?=
OPTIONAL_TARGETS?=
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
endif
ifeq ($(GO_TAGS),stablediffusion)
OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
endif
.PHONY: all test build vendor
all: help
## GPT4ALL
gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
## BERT embeddings
go-bert:
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
## stable diffusion
go-stable-diffusion:
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion go-stable-diffusion
cd go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
go-stable-diffusion/libstablediffusion.a:
$(MAKE) -C go-stable-diffusion libstablediffusion.a
## RWKV
go-rwkv:
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
go-rwkv/librwkv.a: go-rwkv
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
## bloomz
bloomz:
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
@find ./bloomz -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
bloomz/libbloomz.a: bloomz
cd bloomz && make libbloomz.a
go-bert/libgobert.a: go-bert
$(MAKE) -C go-bert libgobert.a
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## CEREBRAS GPT
go-gpt2:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
go-gpt2/libgpt2.a: go-gpt2
$(MAKE) -C go-gpt2 libgpt2.a
whisper.cpp:
git clone https://github.com/ggerganov/whisper.cpp.git
cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
@find ./whisper.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
@find ./whisper.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
@find ./whisper.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
whisper.cpp/libwhisper.a: whisper.cpp
cd whisper.cpp && make libwhisper.a
go-llama:
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
$(GOCMD) mod download
## GENERIC
rebuild: ## Rebuilds the project
$(MAKE) -C go-llama clean
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C go-gpt2 clean
$(MAKE) -C go-rwkv clean
$(MAKE) -C whisper.cpp clean
$(MAKE) -C go-stable-diffusion clean
$(MAKE) -C go-bert clean
$(MAKE) -C bloomz clean
$(MAKE) build
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./gpt4all
rm -rf ./go-stable-diffusion
rm -rf ./go-gpt2
rm -rf ./go-rwkv
rm -rf ./go-bert
rm -rf ./bloomz
rm -rf ./whisper.cpp
rm -rf $(BINARY_NAME)
## Build:
build: prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -tags "$(GO_TAGS)" -x -o $(BINARY_NAME) ./
generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
## Run
run: prepare ## run local-ai
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
test-models/testmodel:
mkdir test-models
mkdir test-dir
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
wget https://huggingface.co/imxcstar/rwkv-4-raven-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096-16_Q4_2.bin -O test-models/rwkv
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
cp tests/fixtures/* test-models
test: prepare test-models/testmodel
cp tests/fixtures/* test-models
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
## Help:
help: ## Show this help.
@echo ''
@echo 'Usage:'
@echo ' ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
@echo ''
@echo 'Targets:'
@awk 'BEGIN {FS = ":.*?## "} { \
if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
}' $(MAKEFILE_LIST)

957
README.md
View File

File diff suppressed because it is too large Load Diff

78
api.go
View File

@@ -1,78 +0,0 @@
package main
import (
"strconv"
llama "github.com/go-skynet/llama/go"
"github.com/gofiber/fiber/v2"
)
func api(l *llama.LLama, listenAddr string, threads int) error {
app := fiber.New()
/*
curl --location --request POST 'http://localhost:8080/predict' --header 'Content-Type: application/json' --data-raw '{
"text": "What is an alpaca?",
"topP": 0.8,
"topK": 50,
"temperature": 0.7,
"tokens": 100
}'
*/
// Endpoint to generate the prediction
app.Post("/predict", func(c *fiber.Ctx) error {
// Get input data from the request body
input := new(struct {
Text string `json:"text"`
})
if err := c.BodyParser(input); err != nil {
return err
}
// Set the parameters for the language model prediction
topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9
if err != nil {
return err
}
topK, err := strconv.Atoi(c.Query("topK", "40")) // Default value of topK is 40
if err != nil {
return err
}
temperature, err := strconv.ParseFloat(c.Query("temperature", "0.5"), 64) // Default value of temperature is 0.5
if err != nil {
return err
}
tokens, err := strconv.Atoi(c.Query("tokens", "128")) // Default value of tokens is 128
if err != nil {
return err
}
// Generate the prediction using the language model
prediction, err := l.Predict(
input.Text,
llama.SetTemperature(temperature),
llama.SetTopP(topP),
llama.SetTopK(topK),
llama.SetTokens(tokens),
llama.SetThreads(threads),
)
if err != nil {
return err
}
// Return the prediction in the response body
return c.JSON(struct {
Prediction string `json:"prediction"`
}{
Prediction: prediction,
})
})
// Start the server
app.Listen(":8080")
return nil
}

100
api/api.go Normal file
View File

@@ -0,0 +1,100 @@
package api
import (
"errors"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool, imageDir string) *fiber.App {
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if debug {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
}
// Return errors as JSON responses
app := fiber.New(fiber.Config{
BodyLimit: uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: disableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
code := fiber.StatusInternalServerError
// Retrieve the custom status code if it's a *fiber.Error
var e *fiber.Error
if errors.As(err, &e) {
code = e.Code
}
// Send custom error page
return ctx.Status(code).JSON(
ErrorResponse{
Error: &APIError{Message: err.Error(), Code: code},
},
)
},
})
if debug {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
}
cm := make(ConfigMerger)
if err := cm.LoadConfigs(loader.ModelPath); err != nil {
log.Error().Msgf("error loading config files: %s", err.Error())
}
if configFile != "" {
if err := cm.LoadConfigFile(configFile); err != nil {
log.Error().Msgf("error loading config file: %s", err.Error())
}
}
if debug {
for k, v := range cm {
log.Debug().Msgf("Model: %s (config: %+v)", k, v)
}
}
// Default middleware config
app.Use(recover.New())
app.Use(cors.New())
// openAI compatible API endpoint
app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
// /v1/engines/{engine_id}/embeddings
app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/images/generations", imageEndpoint(cm, debug, loader, imageDir))
if imageDir != "" {
app.Static("/generated-images", imageDir)
}
app.Get("/v1/models", listModels(loader, cm))
app.Get("/models", listModels(loader, cm))
return app
}

191
api/api_test.go Normal file
View File

@@ -0,0 +1,191 @@
package api_test
import (
"context"
"os"
"path/filepath"
"runtime"
. "github.com/go-skynet/LocalAI/api"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai"
)
var _ = Describe("API test", func() {
var app *fiber.App
var modelLoader *model.ModelLoader
var client *openai.Client
var client2 *openaigo.Client
Context("API query", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
app = App("", modelLoader, 15, 1, 512, false, true, true, "")
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
app.Shutdown()
})
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(10))
})
It("can generate completions", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
It("can generate chat completions ", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate completions from model configs", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
It("can generate chat completions from model configs", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
})
It("transcribes audio", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateTranscription(
context.Background(),
openai.AudioRequest{
Model: openai.Whisper1,
FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
})
It("calculate embeddings", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun", "cat"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
})
Context("backends", func() {
It("runs rwkv", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(resp.Choices[0].Text).To(Equal(" five."))
})
})
})
Context("Config file", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true, "")
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
app.Shutdown()
})
It("can generate chat completions from config file", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(12))
})
It("can generate chat completions from config file", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate chat completions from config file", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate edit completions from config file", func() {
request := openaigo.EditCreateRequestBody{
Model: "list2",
Instruction: "foo",
Input: "bar",
}
resp, err := client2.CreateEdit(context.Background(), request)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
})
})

13
api/apt_suite_test.go Normal file
View File

@@ -0,0 +1,13 @@
package api_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI test suite")
}

297
api/config.go Normal file
View File

@@ -0,0 +1,297 @@
package api
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
)
type Config struct {
OpenAIRequest `yaml:"parameters"`
Name string `yaml:"name"`
StopWords []string `yaml:"stopwords"`
Cutstrings []string `yaml:"cutstrings"`
TrimSpace []string `yaml:"trimspace"`
ContextSize int `yaml:"context_size"`
F16 bool `yaml:"f16"`
Threads int `yaml:"threads"`
Debug bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
MirostatETA float64 `yaml:"mirostat_eta"`
MirostatTAU float64 `yaml:"mirostat_tau"`
Mirostat int `yaml:"mirostat"`
NGPULayers int `yaml:"gpu_layers"`
ImageGenerationAssets string `yaml:"asset_dir"`
PromptStrings, InputStrings []string
InputToken [][]int
}
type TemplateConfig struct {
Completion string `yaml:"completion"`
Chat string `yaml:"chat"`
Edit string `yaml:"edit"`
}
type ConfigMerger map[string]Config
func ReadConfigFile(file string) ([]*Config, error) {
c := &[]*Config{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
return *c, nil
}
func ReadConfig(file string) (*Config, error) {
c := &Config{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
return c, nil
}
func (cm ConfigMerger) LoadConfigFile(file string) error {
c, err := ReadConfigFile(file)
if err != nil {
return fmt.Errorf("cannot load config file: %w", err)
}
for _, cc := range c {
cm[cc.Name] = *cc
}
return nil
}
func (cm ConfigMerger) LoadConfig(file string) error {
c, err := ReadConfig(file)
if err != nil {
return fmt.Errorf("cannot read config file: %w", err)
}
cm[c.Name] = *c
return nil
}
func (cm ConfigMerger) LoadConfigs(path string) error {
files, err := ioutil.ReadDir(path)
if err != nil {
return err
}
for _, file := range files {
// Skip templates, YAML and .keep files
if !strings.Contains(file.Name(), ".yaml") {
continue
}
c, err := ReadConfig(filepath.Join(path, file.Name()))
if err == nil {
cm[c.Name] = *c
}
}
return nil
}
func updateConfig(config *Config, input *OpenAIRequest) {
if input.Echo {
config.Echo = input.Echo
}
if input.TopK != 0 {
config.TopK = input.TopK
}
if input.TopP != 0 {
config.TopP = input.TopP
}
if input.Temperature != 0 {
config.Temperature = input.Temperature
}
if input.Maxtokens != 0 {
config.Maxtokens = input.Maxtokens
}
switch stop := input.Stop.(type) {
case string:
if stop != "" {
config.StopWords = append(config.StopWords, stop)
}
case []interface{}:
for _, pp := range stop {
if s, ok := pp.(string); ok {
config.StopWords = append(config.StopWords, s)
}
}
}
if input.RepeatPenalty != 0 {
config.RepeatPenalty = input.RepeatPenalty
}
if input.Keep != 0 {
config.Keep = input.Keep
}
if input.Batch != 0 {
config.Batch = input.Batch
}
if input.F16 {
config.F16 = input.F16
}
if input.IgnoreEOS {
config.IgnoreEOS = input.IgnoreEOS
}
if input.Seed != 0 {
config.Seed = input.Seed
}
if input.Mirostat != 0 {
config.Mirostat = input.Mirostat
}
if input.MirostatETA != 0 {
config.MirostatETA = input.MirostatETA
}
if input.MirostatTAU != 0 {
config.MirostatTAU = input.MirostatTAU
}
switch inputs := input.Input.(type) {
case string:
if inputs != "" {
config.InputStrings = append(config.InputStrings, inputs)
}
case []interface{}:
for _, pp := range inputs {
switch i := pp.(type) {
case string:
config.InputStrings = append(config.InputStrings, i)
case []interface{}:
tokens := []int{}
for _, ii := range i {
tokens = append(tokens, int(ii.(float64)))
}
config.InputToken = append(config.InputToken, tokens)
}
}
}
switch p := input.Prompt.(type) {
case string:
config.PromptStrings = append(config.PromptStrings, p)
case []interface{}:
for _, pp := range p {
if s, ok := pp.(string); ok {
config.PromptStrings = append(config.PromptStrings, s)
}
}
}
}
func readInput(c *fiber.Ctx, loader *model.ModelLoader, randomModel bool) (string, *OpenAIRequest, error) {
input := new(OpenAIRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return "", nil, err
}
modelFile := input.Model
if c.Params("model") != "" {
modelFile = c.Params("model")
}
received, _ := json.Marshal(input)
log.Debug().Msgf("Request received: %s", string(received))
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelFile == "" && !bearerExists && randomModel {
models, _ := loader.ListModels()
if len(models) > 0 {
modelFile = models[0]
log.Debug().Msgf("No model specified, using: %s", modelFile)
} else {
log.Debug().Msgf("No model specified, returning error")
return "", nil, fmt.Errorf("no model specified")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
return modelFile, input, nil
}
func readConfig(modelFile string, input *OpenAIRequest, cm ConfigMerger, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
// Load a config file if present after the model name
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
if _, err := os.Stat(modelConfig); err == nil {
if err := cm.LoadConfig(modelConfig); err != nil {
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
}
}
var config *Config
cfg, exists := cm[modelFile]
if !exists {
config = &Config{
OpenAIRequest: defaultRequest(modelFile),
ContextSize: ctx,
Threads: threads,
F16: f16,
Debug: debug,
}
} else {
config = &cfg
}
// Set the parameters for the language model prediction
updateConfig(config, input)
// Don't allow 0 as setting
if config.Threads == 0 {
if threads != 0 {
config.Threads = threads
} else {
config.Threads = 4
}
}
// Enforce debug flag if passed from CLI
if debug {
config.Debug = true
}
return config, input, nil
}

672
api/openai.go Normal file
View File

@@ -0,0 +1,672 @@
package api
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
model "github.com/go-skynet/LocalAI/pkg/model"
whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
// APIError provides error information returned by the OpenAI API.
type APIError struct {
Code any `json:"code,omitempty"`
Message string `json:"message"`
Param *string `json:"param,omitempty"`
Type string `json:"type"`
}
type ErrorResponse struct {
Error *APIError `json:"error,omitempty"`
}
type OpenAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type Item struct {
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
Object string `json:"object,omitempty"`
// Images
URL string `json:"url,omitempty"`
B64JSON string `json:"b64_json,omitempty"`
}
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"object,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
Data []Item `json:"data,omitempty"`
Usage OpenAIUsage `json:"usage"`
}
type Choice struct {
Index int `json:"index,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
Message *Message `json:"message,omitempty"`
Delta *Message `json:"delta,omitempty"`
Text string `json:"text,omitempty"`
}
type Message struct {
Role string `json:"role,omitempty" yaml:"role"`
Content string `json:"content,omitempty" yaml:"content"`
}
type OpenAIModel struct {
ID string `json:"id"`
Object string `json:"object"`
}
type OpenAIRequest struct {
Model string `json:"model" yaml:"model"`
// whisper
File string `json:"file" validate:"required"`
Language string `json:"language"`
//whisper/image
ResponseFormat string `json:"response_format"`
// image
Size string `json:"size"`
// Prompt is read only by completion/image API calls
Prompt interface{} `json:"prompt" yaml:"prompt"`
// Edit endpoint
Instruction string `json:"instruction" yaml:"instruction"`
Input interface{} `json:"input" yaml:"input"`
Stop interface{} `json:"stop" yaml:"stop"`
// Messages is read only by chat/completion API calls
Messages []Message `json:"messages" yaml:"messages"`
Stream bool `json:"stream"`
Echo bool `json:"echo"`
// Common options between all the API calls
TopP float64 `json:"top_p" yaml:"top_p"`
TopK int `json:"top_k" yaml:"top_k"`
Temperature float64 `json:"temperature" yaml:"temperature"`
Maxtokens int `json:"max_tokens" yaml:"max_tokens"`
N int `json:"n"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch" yaml:"batch"`
F16 bool `json:"f16" yaml:"f16"`
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
Keep int `json:"n_keep" yaml:"n_keep"`
MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
Mirostat int `json:"mirostat" yaml:"mirostat"`
Seed int `json:"seed" yaml:"seed"`
// Image (not supported by OpenAI)
Mode int `json:"mode"`
Step int `json:"step"`
}
func defaultRequest(modelFile string) OpenAIRequest {
return OpenAIRequest{
TopP: 0.7,
TopK: 80,
Maxtokens: 512,
Temperature: 0.9,
Model: modelFile,
}
}
// https://platform.openai.com/docs/api-reference/completions
func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
templateFile := config.Model
if config.TemplateConfig.Completion != "" {
templateFile = config.TemplateConfig.Completion
}
var result []Choice
for _, i := range config.PromptStrings {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: i})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
}, nil)
if err != nil {
return err
}
result = append(result, r...)
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "text_completion",
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
// https://platform.openai.com/docs/api-reference/embeddings
func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
items := []Item{}
for i, s := range config.InputToken {
// get the model function to call for the result
embedFn, err := ModelEmbedding("", s, loader, *config)
if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
}
for i, s := range config.InputStrings {
// get the model function to call for the result
embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Data: items,
Object: "list",
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
resp := OpenAIResponse{
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
Object: "chat.completion.chunk",
}
log.Debug().Msgf("Sending goroutine: %s", s)
responses <- resp
return true
})
close(responses)
}
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
var predInput string
mess := []string{}
for _, i := range input.Messages {
var content string
r := config.Roles[i.Role]
if r != "" {
content = fmt.Sprint(r, " ", i.Content)
} else {
content = i.Content
}
mess = append(mess, content)
}
predInput = strings.Join(mess, "\n")
if input.Stream {
log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
// c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
}
templateFile := config.Model
if config.TemplateConfig.Chat != "" {
templateFile = config.TemplateConfig.Chat
}
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: predInput})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
if input.Stream {
responses := make(chan OpenAIResponse)
go process(predInput, input, config, loader, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
for ev := range responses {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
fmt.Fprintf(w, "event: data\n\n")
fmt.Fprintf(w, "data: %v\n\n", buf.String())
log.Debug().Msgf("Sending chunk: %s", buf.String())
w.Flush()
}
w.WriteString("event: data\n\n")
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{FinishReason: "stop"}},
}
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
w.Flush()
}))
return nil
}
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
}, nil)
if err != nil {
return err
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "chat.completion",
}
respData, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", respData)
// Return the prediction in the response body
return c.JSON(resp)
}
}
func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
templateFile := config.Model
if config.TemplateConfig.Edit != "" {
templateFile = config.TemplateConfig.Edit
}
var result []Choice
for _, i := range config.InputStrings {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
Input string
Instruction string
}{Input: i})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
}, nil)
if err != nil {
return err
}
result = append(result, r...)
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "edit",
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
// https://platform.openai.com/docs/api-reference/images/create
/*
*
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "A cute baby sea otter",
"n": 1,
"size": "512x512"
}'
*
*/
func imageEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, imageDir string) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, loader, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
if m == "" {
m = model.StableDiffusionBackend
}
log.Debug().Msgf("Loading model: %+v", m)
config, input, err := readConfig(m, input, cm, loader, debug, 0, 0, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
// XXX: Only stablediffusion is supported for now
if config.Backend == "" {
config.Backend = model.StableDiffusionBackend
}
sizeParts := strings.Split(input.Size, "x")
if len(sizeParts) != 2 {
return fmt.Errorf("Invalid value for 'size'")
}
width, err := strconv.Atoi(sizeParts[0])
if err != nil {
return fmt.Errorf("Invalid value for 'size'")
}
height, err := strconv.Atoi(sizeParts[1])
if err != nil {
return fmt.Errorf("Invalid value for 'size'")
}
b64JSON := false
if input.ResponseFormat == "b64_json" {
b64JSON = true
}
var result []Item
for _, i := range config.PromptStrings {
n := input.N
if input.N == 0 {
n = 1
}
for j := 0; j < n; j++ {
prompts := strings.Split(i, "|")
positive_prompt := prompts[0]
negative_prompt := ""
if len(prompts) > 1 {
negative_prompt = prompts[1]
}
mode := 0
step := 15
if input.Mode != 0 {
mode = input.Mode
}
if input.Step != 0 {
step = input.Step
}
tempDir := ""
if !b64JSON {
tempDir = imageDir
}
// Create a temporary file
outputFile, err := ioutil.TempFile(tempDir, "b64")
if err != nil {
return err
}
outputFile.Close()
output := outputFile.Name() + ".png"
// Rename the temporary file
err = os.Rename(outputFile.Name(), output)
if err != nil {
return err
}
baseURL := c.BaseURL()
fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, loader, *config)
if err != nil {
return err
}
if err := fn(); err != nil {
return err
}
item := &Item{}
if b64JSON {
defer os.RemoveAll(output)
data, err := os.ReadFile(output)
if err != nil {
return err
}
item.B64JSON = base64.StdEncoding.EncodeToString(data)
} else {
base := filepath.Base(output)
item.URL = baseURL + "/generated-images/" + base
}
result = append(result, *item)
}
}
resp := &OpenAIResponse{
Data: result,
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
// https://platform.openai.com/docs/api-reference/audio/create
func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, loader, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(m, input, cm, loader, debug, threads, ctx, f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
// retrieve the file data from the request
file, err := c.FormFile("file")
if err != nil {
return err
}
f, err := file.Open()
if err != nil {
return err
}
defer f.Close()
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return err
}
defer os.RemoveAll(dir)
dst := filepath.Join(dir, path.Base(file.Filename))
dstFile, err := os.Create(dst)
if err != nil {
return err
}
if _, err := io.Copy(dstFile, f); err != nil {
log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
return err
}
log.Debug().Msgf("Audio file copied to: %+v", dst)
whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
if err != nil {
return err
}
if whisperModel == nil {
return fmt.Errorf("could not load whisper model")
}
w, ok := whisperModel.(whisper.Model)
if !ok {
return fmt.Errorf("loader returned non-whisper object")
}
tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
if err != nil {
return err
}
log.Debug().Msgf("Trascribed: %+v", tr)
// TODO: handle different outputs here
return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr})
}
}
func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
models, err := loader.ListModels()
if err != nil {
return err
}
var mm map[string]interface{} = map[string]interface{}{}
dataModels := []OpenAIModel{}
for _, m := range models {
mm[m] = nil
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
}
for k := range cm {
if _, exists := mm[k]; !exists {
dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
}
}
return c.JSON(struct {
Object string `json:"object"`
Data []OpenAIModel `json:"data"`
}{
Object: "list",
Data: dataModels,
})
}
}

561
api/prediction.go Normal file
View File

@@ -0,0 +1,561 @@
package api
import (
"fmt"
"regexp"
"strings"
"sync"
"github.com/donomii/go-rwkv.cpp"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
"github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
llama "github.com/go-skynet/go-llama.cpp"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
var mutexMap sync.Mutex
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
func defaultLLamaOpts(c Config) []llama.ModelOption {
llamaOpts := []llama.ModelOption{}
if c.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
}
if c.F16 {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if c.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if c.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
}
return llamaOpts
}
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config) (func() error, error) {
if c.Backend != model.StableDiffusionBackend {
return nil, fmt.Errorf("endpoint only working with stablediffusion models")
}
inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads))
if err != nil {
return nil, err
}
var fn func() error
switch model := inferenceModel.(type) {
case *stablediffusion.StableDiffusion:
fn = func() error {
return model.GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst)
}
default:
fn = func() error {
return fmt.Errorf("creation of images not supported by the backend")
}
}
return func() error {
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[c.Backend]
if !ok {
m := &sync.Mutex{}
mutexes[c.Backend] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
return fn()
}, nil
}
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
if !c.Embeddings {
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
}
modelFile := c.Model
llamaOpts := defaultLLamaOpts(c)
var inferenceModel interface{}
var err error
if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
} else {
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
}
if err != nil {
return nil, err
}
var fn func() ([]float32, error)
switch model := inferenceModel.(type) {
case *llama.LLama:
fn = func() ([]float32, error) {
predictOptions := buildLLamaPredictOptions(c)
if len(tokens) > 0 {
return model.TokenEmbeddings(tokens, predictOptions...)
}
return model.Embeddings(s, predictOptions...)
}
// bert embeddings
case *bert.Bert:
fn = func() ([]float32, error) {
if len(tokens) > 0 {
return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
}
return model.Embeddings(s, bert.SetThreads(c.Threads))
}
default:
fn = func() ([]float32, error) {
return nil, fmt.Errorf("embeddings not supported by the backend")
}
}
return func() ([]float32, error) {
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[modelFile]
if !ok {
m := &sync.Mutex{}
mutexes[modelFile] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
embeds, err := fn()
if err != nil {
return embeds, err
}
// Remove trailing 0s
for i := len(embeds) - 1; i >= 0; i-- {
if embeds[i] == 0.0 {
embeds = embeds[:i]
} else {
break
}
}
return embeds, nil
}, nil
}
func buildLLamaPredictOptions(c Config) []llama.PredictOption {
// Generate the prediction using the language model
predictOptions := []llama.PredictOption{
llama.SetTemperature(c.Temperature),
llama.SetTopP(c.TopP),
llama.SetTopK(c.TopK),
llama.SetTokens(c.Maxtokens),
llama.SetThreads(c.Threads),
}
if c.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
}
if c.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
}
if c.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
}
if c.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
if c.RepeatPenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
}
if c.Keep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
}
if c.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
}
if c.F16 {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if c.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if c.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
}
return predictOptions
}
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
supportStreams := false
modelFile := c.Model
llamaOpts := defaultLLamaOpts(c)
var inferenceModel interface{}
var err error
if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
} else {
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
}
if err != nil {
return nil, err
}
var fn func() (string, error)
switch model := inferenceModel.(type) {
case *rwkv.RwkvState:
supportStreams = true
fn = func() (string, error) {
stopWord := "\n"
if len(c.StopWords) > 0 {
stopWord = c.StopWords[0]
}
if err := model.ProcessInput(s); err != nil {
return "", err
}
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
return response, nil
}
case *gpt2.GPTNeoX:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.Replit:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.Starcoder:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.RedPajama:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *bloomz.Bloomz:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []bloomz.PredictOption{
bloomz.SetTemperature(c.Temperature),
bloomz.SetTopP(c.TopP),
bloomz.SetTopK(c.TopK),
bloomz.SetTokens(c.Maxtokens),
bloomz.SetThreads(c.Threads),
}
if c.Seed != 0 {
predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.StableLM:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.Dolly:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.GPT2:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt4all.Model:
supportStreams = true
fn = func() (string, error) {
if tokenCallback != nil {
model.SetTokenCallback(tokenCallback)
}
// Generate the prediction using the language model
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(c.Temperature),
gpt4all.SetTopP(c.TopP),
gpt4all.SetTopK(c.TopK),
gpt4all.SetTokens(c.Maxtokens),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
}
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
case *llama.LLama:
supportStreams = true
fn = func() (string, error) {
if tokenCallback != nil {
model.SetTokenCallback(tokenCallback)
}
predictOptions := buildLLamaPredictOptions(c)
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
}
return func() (string, error) {
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[modelFile]
if !ok {
m := &sync.Mutex{}
mutexes[modelFile] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
res, err := fn()
if tokenCallback != nil && !supportStreams {
tokenCallback(res)
}
return res, err
}, nil
}
func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
result := []Choice{}
n := input.N
if input.N == 0 {
n = 1
}
// get the model function to call for the result
predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
if err != nil {
return result, err
}
for i := 0; i < n; i++ {
prediction, err := predFunc()
if err != nil {
return result, err
}
prediction = Finetune(*config, predInput, prediction)
cb(prediction, &result)
//result = append(result, Choice{Text: prediction})
}
return result, err
}
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
var mu sync.Mutex = sync.Mutex{}
func Finetune(config Config, input, prediction string) string {
if config.Echo {
prediction = input + prediction
}
for _, c := range config.Cutstrings {
mu.Lock()
reg, ok := cutstrings[c]
if !ok {
cutstrings[c] = regexp.MustCompile(c)
reg = cutstrings[c]
}
mu.Unlock()
prediction = reg.ReplaceAllString(prediction, "")
}
for _, c := range config.TrimSpace {
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
}
return prediction
}

View File

@@ -1,75 +0,0 @@
package client
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
)
type Prediction struct {
Prediction string `json:"prediction"`
}
type Client struct {
baseURL string
client *http.Client
endpoint string
}
func NewClient(baseURL string) *Client {
return &Client{
baseURL: baseURL,
client: &http.Client{},
endpoint: "/predict",
}
}
type InputData struct {
Text string `json:"text"`
TopP float64 `json:"topP,omitempty"`
TopK int `json:"topK,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
Tokens int `json:"tokens,omitempty"`
}
func (c *Client) Predict(text string, opts ...InputOption) (string, error) {
input := NewInputData(opts...)
input.Text = text
// encode input data to JSON format
inputBytes, err := json.Marshal(input)
if err != nil {
return "", err
}
// create HTTP request
url := c.baseURL + c.endpoint
req, err := http.NewRequest("POST", url, bytes.NewBuffer(inputBytes))
if err != nil {
return "", err
}
// set request headers
req.Header.Set("Content-Type", "application/json")
// send request and get response
resp, err := c.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("request failed with status %d", resp.StatusCode)
}
// decode response body to Prediction struct
var prediction Prediction
err = json.NewDecoder(resp.Body).Decode(&prediction)
if err != nil {
return "", err
}
return prediction.Prediction, nil
}

View File

@@ -1,51 +0,0 @@
package client
import "net/http"
type ClientOption func(c *Client)
func WithHTTPClient(httpClient *http.Client) ClientOption {
return func(c *Client) {
c.client = httpClient
}
}
func WithEndpoint(endpoint string) ClientOption {
return func(c *Client) {
c.endpoint = endpoint
}
}
type InputOption func(d *InputData)
func NewInputData(opts ...InputOption) *InputData {
data := &InputData{}
for _, opt := range opts {
opt(data)
}
return data
}
func WithTopP(topP float64) InputOption {
return func(d *InputData) {
d.TopP = topP
}
}
func WithTopK(topK int) InputOption {
return func(d *InputData) {
d.TopK = topK
}
}
func WithTemperature(temperature float64) InputOption {
return func(d *InputData) {
d.Temperature = temperature
}
}
func WithTokens(tokens int) InputOption {
return func(d *InputData) {
d.Tokens = tokens
}
}

15
docker-compose.yaml Normal file
View File

@@ -0,0 +1,15 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: .
dockerfile: Dockerfile.dev
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

7
entrypoint.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
cd /build
make build
./local-ai "$@"

94
examples/README.md Normal file
View File

@@ -0,0 +1,94 @@
# Examples
Here is a list of projects that can easily be integrated with the LocalAI backend.
### Projects
### Chatbot-UI
_by [@mkellerman](https://github.com/mkellerman)_
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
### Discord bot
_by [@mudler](https://github.com/mudler)_
Run a discord bot which lets you talk directly with a model
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
### Langchain
_by [@dave-gray101](https://github.com/dave-gray101)_
A ready to use example to show e2e how to integrate LocalAI with langchain
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
### Langchain Python
_by [@mudler](https://github.com/mudler)_
A ready to use example to show e2e how to integrate LocalAI with langchain
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
### LocalAI WebUI
_by [@dhruvgera](https://github.com/dhruvgera)_
![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
A light, community-maintained web interface for LocalAI
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
### How to run rwkv models
_by [@mudler](https://github.com/mudler)_
A full example on how to run RWKV models with LocalAI
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
### Slack bot
_by [@mudler](https://github.com/mudler)_
Run a slack bot which lets you talk directly with a model
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
### Question answering on documents with llama-index
_by [@mudler](https://github.com/mudler)_
Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
### Question answering on documents with langchain and chroma
_by [@mudler](https://github.com/mudler)_
Shows how to integrate with `Langchain` and `Chroma` to enable question answering on a set of documents.
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
### Template for Runpod.io
_by [@fHachenberg](https://github.com/fHachenberg)_
Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
## Want to contribute?
Create an issue, and put `Example: <description>` in the title! We will post your examples here.

View File

@@ -0,0 +1,48 @@
# chatbot-ui
Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/chatbot-ui
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --pull always
# or you can build the images with:
# docker-compose up -d --build
```
## Pointing chatbot-ui to a separately managed LocalAI service
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
```
version: '3.6'
services:
chatgpt:
image: ghcr.io/mckaywrigley/chatbot-ui:main
ports:
- 3000:3000
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
```
Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
## Accessing chatbot-ui
Open http://localhost:3000 for the Web UI.

View File

@@ -0,0 +1,24 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
chatgpt:
image: ghcr.io/mckaywrigley/chatbot-ui:main
ports:
- 3000:3000
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://api:8080'

View File

@@ -0,0 +1 @@
{{.Input}}

View File

@@ -0,0 +1,17 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 14
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
template:
completion: completion
chat: gpt4all

View File

@@ -0,0 +1,4 @@
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt:
{{.Input}}
### Response:

View File

@@ -0,0 +1,6 @@
OPENAI_API_KEY=x
DISCORD_BOT_TOKEN=x
DISCORD_CLIENT_ID=x
OPENAI_API_BASE=http://api:8080
ALLOWED_SERVER_IDS=x
SERVER_TO_MODERATION_CHANNEL=1:1

View File

@@ -0,0 +1,76 @@
# discord-bot
![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png)
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/discord-bot
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
cp -rfv .env.example .env
vim .env
# start with docker-compose
docker-compose up -d --build
```
Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
## Kubernetes
- install the local-ai chart first
- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
```yaml
apiVersion: v1
kind: Namespace
metadata:
name: discord-bot
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: localai
namespace: discord-bot
labels:
app: localai
spec:
selector:
matchLabels:
app: localai
replicas: 1
template:
metadata:
labels:
app: localai
name: localai
spec:
containers:
- name: localai-discord
env:
- name: OPENAI_API_KEY
value: "x"
- name: DISCORD_BOT_TOKEN
value: ""
- name: DISCORD_CLIENT_ID
value: ""
- name: OPENAI_API_BASE
value: "http://local-ai.default.svc.cluster.local:8080"
- name: ALLOWED_SERVER_IDS
value: "xx"
- name: SERVER_TO_MODERATION_CHANNEL
value: "1:1"
image: quay.io/go-skynet/gpt-discord-bot:main
```

View File

@@ -0,0 +1,21 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
bot:
image: quay.io/go-skynet/gpt-discord-bot:main
env_file:
- .env

1
examples/discord-bot/models Symbolic link
View File

@@ -0,0 +1 @@
../chatbot-ui/models/

View File

@@ -0,0 +1,54 @@
# Data query example
This example makes use of [langchain and chroma](https://blog.langchain.dev/langchain-chroma/) to enable question answering on a set of documents.
## Setup
Download the models and start the API:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/query_data
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --build
```
### Python requirements
```
pip install -r requirements.txt
```
### Create a storage
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
python store.py
```
After it finishes, a directory "storage" will be created with the vector index database.
## Query
We can now query the dataset.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python query.py
# President Trump recently stated during a press conference regarding tax reform legislation that "we're getting rid of all these loopholes." He also mentioned that he wants to simplify the system further through changes such as increasing the standard deduction amount and making other adjustments aimed at reducing taxpayers' overall burden.
```
Keep in mind now things are hit or miss!

View File

@@ -0,0 +1 @@
{{.Input}}

View File

@@ -0,0 +1,5 @@
name: text-embedding-ada-002
parameters:
model: bert
backend: bert-embeddings
embeddings: true

View File

@@ -0,0 +1,16 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
template:
completion: completion
chat: gpt4all

View File

@@ -0,0 +1,4 @@
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt:
{{.Input}}
### Response:

View File

@@ -0,0 +1,20 @@
import os
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# Load and process the text
embedding = OpenAIEmbeddings()
persist_directory = 'db'
# Now we can load the persisted database from disk, and use it as normal.
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb)
query = "What the president said about taxes ?"
print(qa.run(query))

View File

@@ -0,0 +1,4 @@
langchain==0.0.160
openai==0.27.6
chromadb==0.3.21
llama-index==0.6.2

View File

@@ -0,0 +1,28 @@
import os
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# Load and process the text
loader = TextLoader('state_of_the_union.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
#text_splitter = TokenTextSplitter()
texts = text_splitter.split_documents(documents)
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
vectordb.persist()
vectordb = None

View File

@@ -0,0 +1,35 @@
## Langchain-python
Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
See the example below:
```
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain-python
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --build
pip install langchain
pip install openai
export OPENAI_API_BASE=http://localhost:8080
export OPENAI_API_KEY=sk-
python test.py
# A good company name for a company that makes colorful socks would be "Colorsocks".
python agent.py
```

View File

@@ -0,0 +1,44 @@
## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
from io import StringIO
import sys
import os
from typing import Dict, Optional
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents.tools import Tool
from langchain.llms import OpenAI
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
class PythonREPL:
"""Simulates a standalone Python REPL."""
def __init__(self):
pass
def run(self, command: str) -> str:
"""Run command and returns anything printed."""
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()
try:
exec(command, globals())
sys.stdout = old_stdout
output = mystdout.getvalue()
except Exception as e:
sys.stdout = old_stdout
output = str(e)
return output
llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
python_repl = Tool(
"Python REPL",
PythonREPL().run,
"""A Python shell. Use this to execute python commands. Input should be a valid python command.
If you expect output it should be printed out.""",
)
tools = [python_repl]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
agent.run("What is the 10th fibonacci number?")

View File

@@ -0,0 +1,16 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

View File

@@ -0,0 +1 @@
../chatbot-ui/models

View File

@@ -0,0 +1,6 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

2
examples/langchain/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
models/ggml-koala-13B-4bit-128g
models/ggml-gpt4all-j

View File

@@ -0,0 +1,6 @@
FROM node:latest
COPY ./langchainjs-localai-example /app
WORKDIR /app
RUN npm install
RUN npm run build
ENTRYPOINT [ "npm", "run", "start" ]

View File

@@ -0,0 +1,5 @@
FROM python:3.10-bullseye
COPY ./langchainpy-localai-example /app
WORKDIR /app
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "python", "./full_demo.py" ];

View File

@@ -0,0 +1,30 @@
# langchain
Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain
# (optional) - Edit the example code in typescript.
# vi ./langchainjs-localai-example/index.ts
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose for typescript!
docker-compose --profile ts up --build
# or start with docker-compose for python!
docker-compose --profile py up --build
```
## Copyright
Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.

View File

@@ -0,0 +1,43 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
js:
build:
context: .
dockerfile: JS.Dockerfile
profiles:
- js
- ts
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
py:
build:
context: .
dockerfile: PY.Dockerfile
profiles:
- py
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'

View File

@@ -0,0 +1,2 @@
node_modules/
dist/

View File

@@ -0,0 +1,20 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
// "skipFiles": [
// "<node_internals>/**"
// ],
"program": "${workspaceFolder}\\dist\\index.mjs",
"outFiles": [
"${workspaceFolder}/**/*.js"
]
}
]
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
{
"name": "langchainjs-localai-example",
"version": "0.1.0",
"description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
"main": "index.mjs",
"scripts": {
"build": "tsc --build",
"clean": "tsc --build --clean",
"start": "node --trace-warnings dist/index.mjs"
},
"author": "dave@gray101.com",
"license": "MIT",
"devDependencies": {
"@types/node": "^18.16.4",
"typescript": "^5.0.4"
},
"dependencies": {
"langchain": "^0.0.67",
"typeorm": "^0.3.15"
}
}

View File

@@ -0,0 +1,79 @@
import { OpenAIChat } from "langchain/llms/openai";
import { loadQAStuffChain } from "langchain/chains";
import { Document } from "langchain/document";
import { initializeAgentExecutorWithOptions } from "langchain/agents";
import {Calculator} from "langchain/tools/calculator";
const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
function getModel(): OpenAIChat {
return new OpenAIChat({
prefixMessages: [
{
role: "system",
content: "You are a helpful assistant that answers in pirate language",
},
],
modelName: modelName,
maxTokens: 50,
openAIApiKey: fakeApiKey,
maxRetries: 2
}, {
basePath: pathToLocalAi,
apiKey: fakeApiKey,
});
}
// Minimal example.
export const run = async () => {
const model = getModel();
console.log(`about to model.call at ${new Date().toUTCString()}`);
const res = await model.call(
"What would be a good company name a company that makes colorful socks?"
);
console.log(`${new Date().toUTCString()}`);
console.log({ res });
};
await run();
// This example uses the `StuffDocumentsChain`
export const run2 = async () => {
const model = getModel();
const chainA = loadQAStuffChain(model);
const docs = [
new Document({ pageContent: "Harrison went to Harvard." }),
new Document({ pageContent: "Ankush went to Princeton." }),
];
const resA = await chainA.call({
input_documents: docs,
question: "Where did Harrison go to college?",
});
console.log({ resA });
};
await run2();
// Quickly thrown together example of using tools + agents.
// This seems like it should work, but it doesn't yet.
export const temporarilyBrokenToolTest = async () => {
const model = getModel();
const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
agentType: "zero-shot-react-description",
});
console.log("Loaded agent.");
const input = `What is the value of (500 *2) + 350 - 13?`;
console.log(`Executing with input "${input}"...`);
const result = await executor.call({ input });
console.log(`Got output ${result.output}`);
}
await temporarilyBrokenToolTest();

View File

@@ -0,0 +1,15 @@
{
"compilerOptions": {
"target": "es2022",
"lib": ["ES2022", "DOM"],
"module": "ES2022",
"moduleResolution": "node",
"strict": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"isolatedModules": true,
"outDir": "./dist"
},
"include": ["src", "test"],
"exclude": ["node_modules", "dist"]
}

View File

@@ -0,0 +1,24 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"redirectOutput": true,
"justMyCode": false
},
{
"name": "Python: Attach to Port 5678",
"type": "python",
"request": "attach",
"connect": {
"host": "localhost",
"port": 5678
},
"justMyCode": false
}
]
}

View File

@@ -0,0 +1,3 @@
{
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
}

View File

@@ -0,0 +1,46 @@
import os
import logging
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
# This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
logging.basicConfig(level=logging.DEBUG)
print('Langchain + LocalAI PYTHON Tests')
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
key = os.environ.get('OPENAI_API_KEY', '-')
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
print("Created ChatOpenAI for ", chat.model_name)
template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
print("ABOUT to execute")
# get a chat completion from the formatted messages
response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
print(response)
print(".");

View File

@@ -0,0 +1,32 @@
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
certifi==2022.12.7
charset-normalizer==3.1.0
colorama==0.4.6
dataclasses-json==0.5.7
debugpy==1.6.7
frozenlist==1.3.3
greenlet==2.0.2
idna==3.4
langchain==0.0.159
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
mypy-extensions==1.0.0
numexpr==2.8.4
numpy==1.24.3
openai==0.27.6
openapi-schema-pydantic==1.2.4
packaging==23.1
pydantic==1.10.7
PyYAML==6.0
requests==2.29.0
SQLAlchemy==2.0.12
tenacity==8.2.2
tqdm==4.65.0
typing-inspect==0.8.0
typing_extensions==4.5.0
urllib3==1.26.15
yarl==1.9.2

View File

@@ -0,0 +1,6 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

View File

@@ -0,0 +1 @@
{{.Input}}

View File

@@ -0,0 +1,18 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 4
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
backend: "gptj"
template:
completion: completion
chat: gpt4all

View File

@@ -0,0 +1,4 @@
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt:
{{.Input}}
### Response:

View File

@@ -0,0 +1,26 @@
# localai-webui
Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/localai-webui
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download any desired models to models/ in the parent LocalAI project dir
# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
# start with docker-compose
docker-compose up -d --build
```
Open http://localhost:3000 for the Web UI.

View File

@@ -0,0 +1,20 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: .
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]
frontend:
image: quay.io/go-skynet/localai-frontend:master
ports:
- 3000:3000

1
examples/query_data/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
storage/

View File

@@ -0,0 +1,67 @@
# Data query example
This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
Summary of the steps:
- prepare the dataset (and store it into `data`)
- prepare a vector index database to run queries on
- run queries
## Requirements
You will need a training data set. Copy that over `data`.
## Setup
Start the API:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/query_data
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --build
```
### Create a storage
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python store.py
```
After it finishes, a directory "storage" will be created with the vector index database.
## Query
We can now query the dataset.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python query.py
```
## Update
To update our vector database, run `update.py`
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python update.py
```

View File

View File

@@ -0,0 +1,15 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: .
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]

View File

@@ -0,0 +1 @@
{{.Input}}

View File

@@ -0,0 +1,6 @@
name: text-embedding-ada-002
parameters:
model: bert
threads: 14
backend: bert-embeddings
embeddings: true

View File

@@ -0,0 +1,17 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 14
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
template:
completion: completion
chat: gpt4all

View File

@@ -0,0 +1,35 @@
import os
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
# os.environ['OPENAI_API_KEY']= ""
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from langchain.llms.openai import OpenAI
from llama_index import StorageContext, load_index_from_storage
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
# Configure prompt parameters and initialise helper
max_input_size = 500
num_output = 256
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir='./storage')
# load index
index = load_index_from_storage(storage_context, service_context=service_context, )
query_engine = index.as_query_engine()
data = input("Question: ")
response = query_engine.query(data)
print(response)

View File

@@ -0,0 +1,27 @@
import os
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
# os.environ['OPENAI_API_KEY']= ""
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
from langchain.llms.openai import OpenAI
from llama_index import StorageContext, load_index_from_storage
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
# Configure prompt parameters and initialise helper
max_input_size = 400
num_output = 400
max_chunk_overlap = 30
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
documents = SimpleDirectoryReader('data').load_data()
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")

View File

@@ -0,0 +1,32 @@
import os
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
# os.environ['OPENAI_API_KEY']= ""
from llama_index import LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
from langchain.llms.openai import OpenAI
from llama_index import StorageContext, load_index_from_storage
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
# Configure prompt parameters and initialise helper
max_input_size = 512
num_output = 256
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir='./storage')
# load index
index = load_index_from_storage(storage_context, service_context=service_context, )
documents = SimpleDirectoryReader('data').load_data()
index.refresh(documents)
index.storage_context.persist(persist_dir="./storage")

View File

@@ -0,0 +1,10 @@
FROM python
# convert the model (one-off)
RUN pip3 install torch numpy
WORKDIR /build
COPY ./scripts/ .
RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
ENTRYPOINT [ "/build/build.sh" ]

59
examples/rwkv/README.md Normal file
View File

@@ -0,0 +1,59 @@
# rwkv
Example of how to run rwkv models.
## Run models
Setup:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/rwkv
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# build the tooling image to convert an rwkv model locally:
docker build -t rwkv-converter -f Dockerfile.build .
# download and convert a model (one-off) - it's going to be fast on CPU too!
docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
# Get the tokenizer
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
# start with docker-compose
docker-compose up -d --build
```
Test it out:
```bash
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "gpt-3.5-turbo",
"prompt": "A long time ago, in a galaxy far away",
"max_tokens": 100,
"temperature": 0.9, "top_p": 0.8, "top_k": 80
}'
# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.9, "top_p": 0.8, "top_k": 80
}'
# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
```
### Fine tuning
See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
## See also
- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)

View File

@@ -0,0 +1,16 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

View File

@@ -0,0 +1,19 @@
name: gpt-3.5-turbo
parameters:
model: rwkv
top_k: 80
temperature: 0.9
max_tokens: 100
top_p: 0.8
context_size: 1024
threads: 14
backend: "rwkv"
cutwords:
- "Bob:.*"
roles:
user: "Bob:"
system: "Alice:"
assistant: "Alice:"
template:
completion: rwkv_completion
chat: rwkv_chat

View File

@@ -0,0 +1,13 @@
The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
Bob: Hello Alice, how are you doing?
Alice: Hi Bob! Thanks, I'm fine. What about you?
Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
Alice: Not at all! I'm listening.
{{.Input}}
Alice:

View File

@@ -0,0 +1 @@
Complete the following sentence: {{.Input}}

11
examples/rwkv/scripts/build.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -ex
URL=$1
OUT=$2
FILENAME=$(basename $URL)
wget -nc $URL -O /build/$FILENAME
python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2

View File

@@ -0,0 +1,11 @@
SLACK_APP_TOKEN=xapp-1-...
SLACK_BOT_TOKEN=xoxb-...
OPENAI_API_KEY=sk-...
OPENAI_API_BASE=http://api:8080
OPENAI_MODEL=gpt-3.5-turbo
OPENAI_TIMEOUT_SECONDS=60
#OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
#for mistakes and make suggestion to improve the language of the given text"
USE_SLACK_LANGUAGE=true
SLACK_APP_LOG_LEVEL=INFO
TRANSLATE_MARKDOWN=true

View File

@@ -0,0 +1,27 @@
# Slack bot
Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/slack-bot
git clone https://github.com/seratch/ChatGPT-in-Slack
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
cp -rfv .env.example .env
vim .env
# start with docker-compose
docker-compose up -d --build
```

View File

@@ -0,0 +1,23 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
bot:
build:
context: ./ChatGPT-in-Slack
dockerfile: Dockerfile
env_file:
- .env

1
examples/slack-bot/models Symbolic link
View File

@@ -0,0 +1 @@
../chatbot-ui/models

77
go.mod
View File

@@ -1,45 +1,68 @@
module github.com/go-skynet/llama-cli
module github.com/go-skynet/LocalAI
go 1.19
require (
github.com/charmbracelet/bubbles v0.15.0
github.com/charmbracelet/bubbletea v0.23.2
github.com/charmbracelet/lipgloss v0.7.1
github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc
github.com/gofiber/fiber/v2 v2.42.0
github.com/urfave/cli/v2 v2.25.0
github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d
github.com/go-audio/wav v1.1.0
github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84
github.com/gofiber/fiber/v2 v2.45.0
github.com/hashicorp/go-multierror v1.1.1
github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc
github.com/onsi/ginkgo/v2 v2.9.5
github.com/onsi/gomega v1.27.6
github.com/otiai10/copy v1.11.0
github.com/otiai10/openaigo v1.1.0
github.com/rs/zerolog v1.29.1
github.com/sashabaranov/go-openai v1.9.4
github.com/swaggo/swag v1.16.1
github.com/urfave/cli/v2 v2.25.3
github.com/valyala/fasthttp v1.47.0
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/KyleBanks/depth v1.2.1 // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/go-audio/audio v1.0.0 // indirect
github.com/go-audio/riff v1.0.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.6 // indirect
github.com/go-openapi/spec v0.20.4 // indirect
github.com/go-openapi/swag v0.19.15 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/compress v1.16.3 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-isatty v0.0.18 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.15.1 // indirect
github.com/philhofer/fwd v1.1.1 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d // indirect
github.com/tinylib/msgp v1.1.6 // indirect
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
github.com/tinylib/msgp v1.1.8 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasthttp v1.44.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.6.0 // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/net v0.10.0 // indirect
golang.org/x/sys v0.8.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/tools v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
)

253
go.sum
View File

@@ -1,123 +1,242 @@
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
github.com/aymanbagabas/go-osc52 v1.2.1/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/charmbracelet/bubbles v0.15.0 h1:c5vZ3woHV5W2b8YZI1q7v4ZNQaPetfHuoHzx+56Z6TI=
github.com/charmbracelet/bubbles v0.15.0/go.mod h1:Y7gSFbBzlMpUDR/XM9MhZI374Q+1p1kluf1uLl8iK74=
github.com/charmbracelet/bubbletea v0.23.1/go.mod h1:JAfGK/3/pPKHTnAS8JIE2u9f61BjWTQY57RbT25aMXU=
github.com/charmbracelet/bubbletea v0.23.2 h1:vuUJ9HJ7b/COy4I30e8xDVQ+VRDUEFykIjryPfgsdps=
github.com/charmbracelet/bubbletea v0.23.2/go.mod h1:FaP3WUivcTM0xOKNmhciz60M6I+weYLF76mr1JyI7sM=
github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
github.com/charmbracelet/lipgloss v0.6.0/go.mod h1:tHh2wr34xcHjC2HCXIlGSG1jaDF0S0atAUvBMP6Ppuk=
github.com/charmbracelet/lipgloss v0.7.1 h1:17WMwi7N1b1rVWOjMT+rCh7sQkvDU75B2hbZpc5Kc1E=
github.com/charmbracelet/lipgloss v0.7.1/go.mod h1:yG0k3giv8Qj8edTCbbg6AlQ5e8KNWpFujkNawKNhE2c=
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc h1:NcmO8mA7iRZIX0Qy2SjcsSaV14+g87MiTey1neUJaFQ=
github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d h1:uxKTbiRnplE2SubchneSf4NChtxLJtOy9VdHnQMT0d0=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVeDF1dOuAP/y6xWVC+BRtf9tJOuEza6Asbg=
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 h1:IcfYY5uH0DdDXEJKJ8bq0WZCd9guPPd3xllaWNy8LOk=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b h1:qqxrjY8fYDXQahmCMTCACahm1tbiqHLPUHALkFLyBfo=
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84 h1:f5iYF75bAr73Tl8AdtFD5Urs/2bsHKPh52K++jLbsfk=
github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84/go.mod h1:jxyQ26t1aKC5Gn782w9WWh5n1133PxCOfkuc01xM4RQ=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b h1:1XF24mVaiu7u+CFywTdcDo2ie1pzzhwjt6RHqzpMU34=
github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc=
github.com/muesli/termenv v0.14.0/go.mod h1:kG/pF1E7fh949Xhe156crRUrHNyK221IuGO7Ez60Uc8=
github.com/muesli/termenv v0.15.1 h1:UzuTb/+hhlBugQz28rpzey4ZuKcZ03MeKsoG7IJZIxs=
github.com/muesli/termenv v0.15.1/go.mod h1:HeAQPTzpfs016yGtA4g00CsdYnVLJvxsS4ANqrZs2sQ=
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
github.com/mudler/go-stable-diffusion v0.0.0-20230516104333-2f32a16b5b24 h1:XfRD/bZom6u4zji7aB0urIVOsPe43KlkzSRrVhlzaOM=
github.com/mudler/go-stable-diffusion v0.0.0-20230516104333-2f32a16b5b24/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc h1:OPavP/SUsVWVYPhSUZKZeX8yDSQzf4G+BmUmwzrLTyI=
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc=
github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww=
github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=
github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg=
github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=
golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,142 +0,0 @@
package main
// A simple program demonstrating the text area component from the Bubbles
// component library.
import (
"fmt"
"strings"
"github.com/charmbracelet/bubbles/textarea"
"github.com/charmbracelet/bubbles/viewport"
tea "github.com/charmbracelet/bubbletea"
"github.com/charmbracelet/lipgloss"
llama "github.com/go-skynet/llama/go"
)
func startInteractive(l *llama.LLama, opts ...llama.PredictOption) error {
p := tea.NewProgram(initialModel(l, opts...))
_, err := p.Run()
return err
}
type (
errMsg error
)
type model struct {
viewport viewport.Model
messages *[]string
textarea textarea.Model
senderStyle lipgloss.Style
err error
l *llama.LLama
opts []llama.PredictOption
predictC chan string
}
func initialModel(l *llama.LLama, opts ...llama.PredictOption) model {
ta := textarea.New()
ta.Placeholder = "Send a message..."
ta.Focus()
ta.Prompt = "┃ "
ta.CharLimit = 280
ta.SetWidth(200)
ta.SetHeight(3)
// Remove cursor line styling
ta.FocusedStyle.CursorLine = lipgloss.NewStyle()
ta.ShowLineNumbers = false
vp := viewport.New(200, 5)
vp.SetContent(`Welcome to llama-cli. Type a message and press Enter to send. Alpaca doesn't keep context of the whole chat (yet).`)
ta.KeyMap.InsertNewline.SetEnabled(false)
predictChannel := make(chan string)
messages := []string{}
m := model{
textarea: ta,
messages: &messages,
viewport: vp,
senderStyle: lipgloss.NewStyle().Foreground(lipgloss.Color("5")),
err: nil,
l: l,
opts: opts,
predictC: predictChannel,
}
go func() {
for p := range predictChannel {
str, _ := templateString(emptyInput, struct {
Instruction string
Input string
}{Instruction: p})
res, _ := l.Predict(
str,
opts...,
)
mm := *m.messages
*m.messages = mm[:len(mm)-1]
*m.messages = append(*m.messages, m.senderStyle.Render("llama: ")+res)
m.viewport.SetContent(strings.Join(*m.messages, "\n"))
ta.Reset()
m.viewport.GotoBottom()
}
}()
return m
}
func (m model) Init() tea.Cmd {
return textarea.Blink
}
func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
var (
tiCmd tea.Cmd
vpCmd tea.Cmd
)
m.textarea, tiCmd = m.textarea.Update(msg)
m.viewport, vpCmd = m.viewport.Update(msg)
switch msg := msg.(type) {
case tea.WindowSizeMsg:
// m.viewport.Width = msg.Width
// m.viewport.Height = msg.Height
case tea.KeyMsg:
switch msg.Type {
case tea.KeyCtrlC, tea.KeyEsc:
fmt.Println(m.textarea.Value())
return m, tea.Quit
case tea.KeyEnter:
*m.messages = append(*m.messages, m.senderStyle.Render("You: ")+m.textarea.Value(), m.senderStyle.Render("Loading response..."))
m.predictC <- m.textarea.Value()
m.viewport.SetContent(strings.Join(*m.messages, "\n"))
m.textarea.Reset()
m.viewport.GotoBottom()
}
// We handle errors just like any other message
case errMsg:
m.err = msg
return m, nil
}
return m, tea.Batch(tiCmd, vpCmd)
}
func (m model) View() string {
return fmt.Sprintf(
"%s\n\n%s",
m.viewport.View(),
m.textarea.View(),
) + "\n\n"
}

View File

@@ -1,42 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: llama
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama
namespace: llama
labels:
app: llama
spec:
selector:
matchLabels:
app: llama
replicas: 1
template:
metadata:
labels:
app: llama
name: llama
spec:
containers:
- name: llama
args:
- api
image: quay.io/go-skynet/llama-cli:v0.1
---
apiVersion: v1
kind: Service
metadata:
name: llama
namespace: llama
spec:
selector:
app: llama
type: LoadBalancer
ports:
- protocol: TCP
port: 8080
targetPort: 8080

320
main.go
View File

@@ -1,267 +1,105 @@
package main
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"runtime"
"text/template"
"path/filepath"
llama "github.com/go-skynet/llama/go"
api "github.com/go-skynet/LocalAI/api"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
)
// Define the template string
var emptyInput string = `Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Instruction}}
### Response:`
var nonEmptyInput string = `Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{{.Instruction}}
### Input:
{{.Input}}
### Response:
`
func llamaFromOptions(ctx *cli.Context) (*llama.LLama, error) {
opts := []llama.ModelOption{llama.SetContext(ctx.Int("context-size"))}
if ctx.Bool("alpaca") {
opts = append(opts, llama.EnableAlpaca)
}
return llama.New(ctx.String("model"), opts...)
}
func templateString(t string, in interface{}) (string, error) {
// Parse the template
tmpl, err := template.New("prompt").Parse(t)
if err != nil {
return "", err
}
var buf bytes.Buffer
err = tmpl.Execute(&buf, in)
if err != nil {
return "", err
}
return buf.String(), nil
}
var modelFlags = []cli.Flag{
&cli.StringFlag{
Name: "model",
EnvVars: []string{"MODEL_PATH"},
},
&cli.IntFlag{
Name: "tokens",
EnvVars: []string{"TOKENS"},
Value: 128,
},
&cli.IntFlag{
Name: "context-size",
EnvVars: []string{"CONTEXT_SIZE"},
Value: 512,
},
&cli.IntFlag{
Name: "threads",
EnvVars: []string{"THREADS"},
Value: runtime.NumCPU(),
},
&cli.Float64Flag{
Name: "temperature",
EnvVars: []string{"TEMPERATURE"},
Value: 0.95,
},
&cli.Float64Flag{
Name: "topp",
EnvVars: []string{"TOP_P"},
Value: 0.85,
},
&cli.IntFlag{
Name: "topk",
EnvVars: []string{"TOP_K"},
Value: 20,
},
&cli.BoolFlag{
Name: "alpaca",
EnvVars: []string{"ALPACA"},
Value: true,
},
}
func main() {
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
path, err := os.Getwd()
if err != nil {
log.Error().Msgf("error: %s", err.Error())
os.Exit(1)
}
app := &cli.App{
Name: "llama-cli",
Version: "0.1",
Usage: "llama-cli --model ... --instruction 'What is an alpaca?'",
Flags: append(modelFlags,
&cli.StringFlag{
Name: "template",
EnvVars: []string{"TEMPLATE"},
Name: "LocalAI",
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "f16",
EnvVars: []string{"F16"},
},
&cli.BoolFlag{
Name: "debug",
EnvVars: []string{"DEBUG"},
},
&cli.IntFlag{
Name: "threads",
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
EnvVars: []string{"THREADS"},
Value: 4,
},
&cli.StringFlag{
Name: "instruction",
EnvVars: []string{"INSTRUCTION"},
Name: "models-path",
DefaultText: "Path containing models used for inferencing",
EnvVars: []string{"MODELS_PATH"},
Value: filepath.Join(path, "models"),
},
&cli.StringFlag{
Name: "input",
EnvVars: []string{"INPUT"},
}),
Description: `Run llama.cpp inference`,
UsageText: `
llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "What's an alpaca?"
An Alpaca (Vicugna pacos) is a domesticated species of South American camelid, related to llamas and originally from Peru but now found throughout much of Andean region. They are bred for their fleeces which can be spun into wool or knitted items such as hats, sweaters, blankets etc
echo "An Alpaca (Vicugna pacos) is a domesticated species of South American camelid, related to llamas and originally from Peru but now found throughout much of Andean region. They are bred for their fleeces which can be spun into wool or knitted items such as hats, sweaters, blankets etc" | llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "Proofread, improving clarity and flow" --input "-"
An Alpaca (Vicugna pacos) is a domesticated species from South America that's related to llamas. Originating in Peru but now found throughout the Andean region, they are bred for their fleeces which can be spun into wool or knitted items such as hats and sweaters—blankets too!
`,
Copyright: "go-skynet authors",
Commands: []*cli.Command{
{
Flags: modelFlags,
Name: "interactive",
Action: func(ctx *cli.Context) error {
l, err := llamaFromOptions(ctx)
if err != nil {
fmt.Println("Loading the model failed:", err.Error())
os.Exit(1)
}
return startInteractive(l, llama.SetTemperature(ctx.Float64("temperature")),
llama.SetTopP(ctx.Float64("topp")),
llama.SetTopK(ctx.Int("topk")),
llama.SetTokens(ctx.Int("tokens")),
llama.SetThreads(ctx.Int("threads")))
},
Name: "config-file",
DefaultText: "Config file",
EnvVars: []string{"CONFIG_FILE"},
},
{
Name: "api",
Flags: []cli.Flag{
&cli.IntFlag{
Name: "threads",
EnvVars: []string{"THREADS"},
Value: runtime.NumCPU(),
},
&cli.StringFlag{
Name: "model",
EnvVars: []string{"MODEL_PATH"},
},
&cli.StringFlag{
Name: "address",
EnvVars: []string{"ADDRESS"},
Value: ":8080",
},
&cli.BoolFlag{
Name: "alpaca",
EnvVars: []string{"ALPACA"},
Value: true,
},
&cli.IntFlag{
Name: "context-size",
EnvVars: []string{"CONTEXT_SIZE"},
Value: 512,
},
},
Action: func(ctx *cli.Context) error {
l, err := llamaFromOptions(ctx)
if err != nil {
fmt.Println("Loading the model failed:", err.Error())
os.Exit(1)
}
return api(l, ctx.String("address"), ctx.Int("threads"))
},
&cli.StringFlag{
Name: "address",
DefaultText: "Bind address for the API server.",
EnvVars: []string{"ADDRESS"},
Value: ":8080",
},
&cli.StringFlag{
Name: "image-dir",
DefaultText: "Image directory",
EnvVars: []string{"IMAGE_DIR"},
Value: "",
},
&cli.IntFlag{
Name: "context-size",
DefaultText: "Default context size of the model",
EnvVars: []string{"CONTEXT_SIZE"},
Value: 512,
},
&cli.IntFlag{
Name: "upload-limit",
DefaultText: "Default upload-limit. MB",
EnvVars: []string{"UPLOAD_LIMIT"},
Value: 15,
},
},
Description: `
LocalAI is a drop-in replacement OpenAI API which runs inference locally.
Some of the models compatible are:
- Vicuna
- Koala
- GPT4ALL
- GPT4ALL-J
- Cerebras
- Alpaca
- StableLM (ggml quantized)
It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
`,
UsageText: `local-ai [options]`,
Copyright: "go-skynet authors",
Action: func(ctx *cli.Context) error {
instruction := ctx.String("instruction")
input := ctx.String("input")
templ := ctx.String("template")
promptTemplate := ""
if input != "" {
promptTemplate = nonEmptyInput
} else {
promptTemplate = emptyInput
}
if templ != "" {
dat, err := os.ReadFile(templ)
if err != nil {
fmt.Printf("Failed reading file: %s", err.Error())
os.Exit(1)
}
promptTemplate = string(dat)
}
if instruction == "-" {
dat, err := ioutil.ReadAll(os.Stdin)
if err != nil {
fmt.Printf("reading stdin failed: %s", err)
os.Exit(1)
}
instruction = string(dat)
}
if input == "-" {
dat, err := ioutil.ReadAll(os.Stdin)
if err != nil {
fmt.Printf("reading stdin failed: %s", err)
os.Exit(1)
}
input = string(dat)
}
str, err := templateString(promptTemplate, struct {
Instruction string
Input string
}{Instruction: instruction, Input: input})
if err != nil {
fmt.Println("Templating the input failed:", err.Error())
os.Exit(1)
}
l, err := llamaFromOptions(ctx)
if err != nil {
fmt.Println("Loading the model failed:", err.Error())
os.Exit(1)
}
res, err := l.Predict(
str,
llama.SetTemperature(ctx.Float64("temperature")),
llama.SetTopP(ctx.Float64("topp")),
llama.SetTopK(ctx.Int("topk")),
llama.SetTokens(ctx.Int("tokens")),
llama.SetThreads(ctx.Int("threads")),
)
if err != nil {
fmt.Printf("predicting failed: %s", err)
os.Exit(1)
}
fmt.Println(res)
return nil
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false, ctx.String("image-dir")).Listen(ctx.String("address"))
},
}
err := app.Run(os.Args)
err = app.Run(os.Args)
if err != nil {
fmt.Println(err)
log.Error().Msgf("error: %s", err.Error())
os.Exit(1)
}
}

0
models/.keep Normal file
View File

194
pkg/model/initializers.go Normal file
View File

@@ -0,0 +1,194 @@
package model
import (
"fmt"
"path/filepath"
"strings"
rwkv "github.com/donomii/go-rwkv.cpp"
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
"github.com/rs/zerolog/log"
)
const tokenizerSuffix = ".tokenizer.json"
const (
LlamaBackend = "llama"
BloomzBackend = "bloomz"
StarcoderBackend = "starcoder"
StableLMBackend = "stablelm"
DollyBackend = "dolly"
RedPajamaBackend = "redpajama"
GPTNeoXBackend = "gptneox"
ReplitBackend = "replit"
Gpt2Backend = "gpt2"
Gpt4AllLlamaBackend = "gpt4all-llama"
Gpt4AllMptBackend = "gpt4all-mpt"
Gpt4AllJBackend = "gpt4all-j"
BertEmbeddingsBackend = "bert-embeddings"
RwkvBackend = "rwkv"
WhisperBackend = "whisper"
StableDiffusionBackend = "stablediffusion"
)
var backends []string = []string{
LlamaBackend,
Gpt4AllLlamaBackend,
Gpt4AllMptBackend,
Gpt4AllJBackend,
Gpt2Backend,
WhisperBackend,
RwkvBackend,
BloomzBackend,
StableLMBackend,
DollyBackend,
RedPajamaBackend,
ReplitBackend,
GPTNeoXBackend,
BertEmbeddingsBackend,
StarcoderBackend,
}
var starCoder = func(modelFile string) (interface{}, error) {
return gpt2.NewStarcoder(modelFile)
}
var redPajama = func(modelFile string) (interface{}, error) {
return gpt2.NewRedPajama(modelFile)
}
var dolly = func(modelFile string) (interface{}, error) {
return gpt2.NewDolly(modelFile)
}
var gptNeoX = func(modelFile string) (interface{}, error) {
return gpt2.NewGPTNeoX(modelFile)
}
var replit = func(modelFile string) (interface{}, error) {
return gpt2.NewReplit(modelFile)
}
var stableLM = func(modelFile string) (interface{}, error) {
return gpt2.NewStableLM(modelFile)
}
var bertEmbeddings = func(modelFile string) (interface{}, error) {
return bert.New(modelFile)
}
var bloomzLM = func(modelFile string) (interface{}, error) {
return bloomz.New(modelFile)
}
var gpt2LM = func(modelFile string) (interface{}, error) {
return gpt2.New(modelFile)
}
var stableDiffusion = func(assetDir string) (interface{}, error) {
return stablediffusion.New(assetDir)
}
var whisperModel = func(modelFile string) (interface{}, error) {
return whisper.New(modelFile)
}
func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
return llama.New(s, opts...)
}
}
func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
return gpt4all.New(s, opts...)
}
}
func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
log.Debug().Msgf("Loading RWKV", s, tokenFile)
model := rwkv.LoadFiles(s, tokenFile, threads)
if model == nil {
return nil, fmt.Errorf("could not load model")
}
return model, nil
}
}
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
log.Debug().Msgf("Loading model %s from %s", backendString, modelFile)
switch strings.ToLower(backendString) {
case LlamaBackend:
return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
case BloomzBackend:
return ml.LoadModel(modelFile, bloomzLM)
case StableLMBackend:
return ml.LoadModel(modelFile, stableLM)
case DollyBackend:
return ml.LoadModel(modelFile, dolly)
case RedPajamaBackend:
return ml.LoadModel(modelFile, redPajama)
case Gpt2Backend:
return ml.LoadModel(modelFile, gpt2LM)
case GPTNeoXBackend:
return ml.LoadModel(modelFile, gptNeoX)
case ReplitBackend:
return ml.LoadModel(modelFile, replit)
case StableDiffusionBackend:
return ml.LoadModel(modelFile, stableDiffusion)
case StarcoderBackend:
return ml.LoadModel(modelFile, starCoder)
case Gpt4AllLlamaBackend:
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
case Gpt4AllMptBackend:
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
case Gpt4AllJBackend:
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
case BertEmbeddingsBackend:
return ml.LoadModel(modelFile, bertEmbeddings)
case RwkvBackend:
return ml.LoadModel(modelFile, rwkvLM(filepath.Join(ml.ModelPath, modelFile+tokenizerSuffix), threads))
case WhisperBackend:
return ml.LoadModel(modelFile, whisperModel)
default:
return nil, fmt.Errorf("backend unsupported: %s", backendString)
}
}
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
log.Debug().Msgf("Loading models greedly")
ml.mu.Lock()
m, exists := ml.models[modelFile]
if exists {
ml.mu.Unlock()
return m, nil
}
ml.mu.Unlock()
var err error
for _, b := range backends {
if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
continue
}
log.Debug().Msgf("[%s] Attempting to load", b)
model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads)
if modelerr == nil && model != nil {
log.Debug().Msgf("[%s] Loads OK", b)
return model, nil
} else if modelerr != nil {
err = multierror.Append(err, modelerr)
log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
}
}
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
}

Some files were not shown because too many files have changed in this diff Show More