Compare commits

...

96 Commits

Author SHA1 Message Date
LocalAI [bot]
86f8d5b50a chore(model-gallery): ⬆️ update checksum (#3036)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-28 11:11:23 +00:00
LocalAI [bot]
d4a3872dd9 chore: ⬆️ Update ggerganov/llama.cpp (#3034)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-28 10:46:18 +00:00
Ettore Di Giacinto
d6a7a77f6b fix(gallery): do clear out errors once displayed (#3033)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-28 10:28:10 +02:00
Ettore Di Giacinto
2a839e1432 fix(gallery): do not attempt to delete duplicate files (#3031)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-28 10:27:56 +02:00
LocalAI [bot]
610e1c00c6 chore: ⬆️ Update ggerganov/whisper.cpp (#3029)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-27 21:52:57 +00:00
LocalAI [bot]
b1f93935be chore: ⬆️ Update ggerganov/llama.cpp (#3030)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-27 21:49:13 +00:00
Ettore Di Giacinto
d57acefed4 Update llama3-instruct.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 15:30:13 +02:00
Ettore Di Giacinto
0a7e4c1b93 Update llama3.1-instruct-grammar.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 15:30:01 +02:00
Ettore Di Giacinto
82cc81974f Update llama3.1-instruct.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 15:29:50 +02:00
Ettore Di Giacinto
fe0d092f58 models(gallery): add llama3 with enforced functioncall with grammars (#3027)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:48:00 +02:00
Ettore Di Giacinto
0dd21f2b5e models(gallery): add lumimaid-8b (#3026)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:41:19 +02:00
Ettore Di Giacinto
f9fad3f4ee models: re-order
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:26:23 +02:00
Ettore Di Giacinto
7021c02d45 models(gallery): add openbuddy-llama3.1-8b-v22.1-131k (#3025)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:24:45 +02:00
Ettore Di Giacinto
7aa7f13095 models(gallery): add llama-3.1-8b-instruct-fei-v1-uncensored (#3024)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:22:30 +02:00
Ettore Di Giacinto
d59bcd539e models(gallery): add llama-3.1-70b-japanese-instruct-2407 (#3023)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:18:55 +02:00
Ettore Di Giacinto
d5a6c1e4f6 models(gallery): add meta-llama-3.1-8b-instruct-abliterated (#3022)
* models(gallery): add meta-llama-3.1-8b-instruct-abliterated

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update gallery/index.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 11:00:21 +02:00
Ettore Di Giacinto
7ef8edda32 models(gallery): add darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq… (#3021)
models(gallery): add darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:59:06 +02:00
Ettore Di Giacinto
81c4b72258 models(gallery): add lumimaid-v0.2-12b (#3020)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:28:47 +02:00
Ettore Di Giacinto
fe4c8c8251 models(gallery): add llama3.1-8b-fireplace2 (#3018)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:24:56 +02:00
Ettore Di Giacinto
02d4eeffc8 models(gallery): add mistral-nemo (#3019)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:24:42 +02:00
LocalAI [bot]
80652abc9b chore: ⬆️ Update ggerganov/llama.cpp (#3016)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-26 23:26:28 +00:00
Ettore Di Giacinto
2169c3497d feat(grammar): add llama3.1 schema (#3015)
* wip

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* get rid of panics

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* expose it properly from the config

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* forgot to commit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Remove focus on test

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-26 20:11:29 +02:00
Dave
fee52942eb fix: PR title tag for checksum checker script workflow (#3014)
* fix PR title tag for checksum checker script workflow

Signed-off-by: Dave Lee <dave@gray101.com>

* Update .github/workflows/checksum_checker.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-26 08:46:57 +02:00
LocalAI [bot]
868182bc38 chore: ⬆️ Update ggerganov/llama.cpp (#3012)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-25 22:28:34 +00:00
LocalAI [bot]
ac37b47170 chore: models(gallery): ⬆️ update checksum (#3013)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-25 22:07:10 +00:00
Ettore Di Giacinto
43f49533e8 chore: add function calling template for llama 3.1 models (#3010)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 19:37:35 +02:00
Ettore Di Giacinto
3379c3d98c models(gallery): add stheno
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 19:37:15 +02:00
Ettore Di Giacinto
d605df471c models(gallery): add gemmoy (#3009)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 12:31:17 +02:00
Ettore Di Giacinto
8bf4ccf3ed Update index.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-25 12:23:04 +02:00
Ettore Di Giacinto
392cf15877 models(gallery): add darkidol llama3.1 (#3008)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 12:22:09 +02:00
Ettore Di Giacinto
5eda7f578d refactor: break down json grammar parser in different files (#3004)
* refactor: break down json grammar parser in different files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: patch to `refactor_grammars` - propagate errors (#3006)

propagate errors around

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Dave Lee <dave@gray101.com>
Co-authored-by: Dave <dave@gray101.com>
2024-07-25 08:41:00 +02:00
LocalAI [bot]
717cc6fe1a chore: ⬆️ Update ggerganov/llama.cpp (#3003)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-24 22:47:38 +00:00
LocalAI [bot]
9031d2b9eb docs: ⬆️ update docs version mudler/LocalAI (#3002)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-24 22:32:10 +00:00
Ettore Di Giacinto
4a69ef3052 models(gallery): add llama3.1-claude (#3005)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 23:40:08 +02:00
LocalAI [bot]
80ae919dbe chore: ⬆️ Update ggerganov/llama.cpp (#2995)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-24 15:37:08 +02:00
Ettore Di Giacinto
0802895cd2 Update index.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-24 14:32:54 +02:00
Ettore Di Giacinto
9fee46207a models(gallery): add llama3.1 70b and 8b (#3000)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 12:48:14 +02:00
Ettore Di Giacinto
bd900945f7 fix(llama.cpp): do not set anymore lora_base (#2999)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 12:35:52 +02:00
Ettore Di Giacinto
89484efaed docs: update distributed_inferencing.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-24 12:27:49 +02:00
Ettore Di Giacinto
a9757fb057 fix(cuda): downgrade to 12.0 to increase compatibility range (#2994)
* fix(cuda): downgrade to 12.0 to increase compatibility range

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* improve messaging

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-23 23:35:31 +02:00
dependabot[bot]
1c96e0b79e chore(deps): Bump langchain from 0.2.8 to 0.2.10 in /examples/langchain-chroma (#2987)
chore(deps): Bump langchain in /examples/langchain-chroma

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.8 to 0.2.10.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.8...langchain==0.2.10)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 14:34:07 +00:00
dependabot[bot]
c7f0743f48 chore(deps): Bump openai from 1.35.13 to 1.37.0 in /examples/langchain-chroma (#2988)
chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.35.13 to 1.37.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.35.13...v1.37.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 12:26:46 +00:00
dependabot[bot]
ead69a116a chore(deps): Bump langchain from 0.2.8 to 0.2.10 in /examples/functions (#2975)
Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.8 to 0.2.10.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.8...langchain==0.2.10)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 11:51:05 +00:00
dependabot[bot]
0314b37cd8 chore(deps): Bump openai from 1.35.13 to 1.37.0 in /examples/langchain/langchainpy-localai-example (#2961)
chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.35.13 to 1.37.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.35.13...v1.37.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 11:01:00 +00:00
dependabot[bot]
703cd08f01 chore(deps): Bump langchain-community from 0.2.7 to 0.2.9 in /examples/langchain/langchainpy-localai-example (#2960)
chore(deps): Bump langchain-community

Bumps [langchain-community](https://github.com/langchain-ai/langchain) from 0.2.7 to 0.2.9.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain-community==0.2.7...langchain-community==0.2.9)

---
updated-dependencies:
- dependency-name: langchain-community
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 11:00:46 +00:00
LocalAI [bot]
b53947a5bb chore: ⬆️ Update ggerganov/llama.cpp (#2992)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-23 10:33:42 +00:00
dependabot[bot]
39de3cf21d chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/mamba (#2989)
Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 10:15:55 +00:00
dependabot[bot]
e3cd11cc0a chore(deps): Bump llama-index from 0.10.55 to 0.10.56 in /examples/langchain-chroma (#2986)
chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.55 to 0.10.56.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.55...v0.10.56)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 09:28:33 +00:00
Ettore Di Giacinto
5e5037f10d feat(p2p): warn the user to start with --p2p (#2993)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-23 10:42:51 +02:00
dependabot[bot]
9c331239d9 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/autogptq (#2984)
chore(deps): Bump grpcio in /backend/python/autogptq

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 08:16:38 +00:00
dependabot[bot]
36789e9ead chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/transformers-musicgen (#2990)
chore(deps): Bump grpcio in /backend/python/transformers-musicgen

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 07:34:26 +00:00
dependabot[bot]
6ec593c237 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/vall-e-x (#2981)
chore(deps): Bump grpcio in /backend/python/vall-e-x

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 06:50:45 +00:00
dependabot[bot]
bbb1dc2ae0 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/parler-tts (#2982)
chore(deps): Bump grpcio in /backend/python/parler-tts

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 06:33:45 +00:00
dependabot[bot]
385d8dc29b chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/coqui (#2980)
Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 06:15:50 +00:00
dependabot[bot]
fb574434a4 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/rerankers (#2974)
chore(deps): Bump grpcio in /backend/python/rerankers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 04:40:27 +00:00
dependabot[bot]
7ab3217df0 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/exllama2 (#2971)
chore(deps): Bump grpcio in /backend/python/exllama2

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 04:03:28 +00:00
dependabot[bot]
2f9f04b260 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/diffusers (#2969)
chore(deps): Bump grpcio in /backend/python/diffusers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 03:47:26 +00:00
dependabot[bot]
8385eb2a59 chore(deps): Bump openai from 1.35.13 to 1.37.0 in /examples/functions (#2973)
Bumps [openai](https://github.com/openai/openai-python) from 1.35.13 to 1.37.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.35.13...v1.37.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 03:42:48 +00:00
dependabot[bot]
99324eeef0 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/transformers (#2970)
chore(deps): Bump grpcio in /backend/python/transformers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 02:39:44 +00:00
dependabot[bot]
ede352256b chore(deps): Bump weaviate-client from 4.6.5 to 4.6.7 in /examples/chainlit (#2965)
chore(deps): Bump weaviate-client in /examples/chainlit

Bumps [weaviate-client](https://github.com/weaviate/weaviate-python-client) from 4.6.5 to 4.6.7.
- [Release notes](https://github.com/weaviate/weaviate-python-client/releases)
- [Changelog](https://github.com/weaviate/weaviate-python-client/blob/main/docs/changelog.rst)
- [Commits](https://github.com/weaviate/weaviate-python-client/compare/v4.6.5...v4.6.7)

---
updated-dependencies:
- dependency-name: weaviate-client
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 01:17:19 +00:00
dependabot[bot]
b555b64616 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/common/template (#2963)
chore(deps): Bump grpcio in /backend/python/common/template

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 01:07:42 +00:00
dependabot[bot]
824cc816ea chore(deps): Bump llama-index from 0.10.55 to 0.10.56 in /examples/chainlit (#2966)
chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.55 to 0.10.56.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.55...v0.10.56)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 00:58:30 +00:00
dependabot[bot]
a1bc2e9771 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/vllm (#2964)
Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-23 00:08:22 +00:00
dependabot[bot]
9fc09b32cf chore(deps): Bump sqlalchemy from 2.0.30 to 2.0.31 in /examples/langchain/langchainpy-localai-example (#2957)
chore(deps): Bump sqlalchemy

Bumps [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy) from 2.0.30 to 2.0.31.
- [Release notes](https://github.com/sqlalchemy/sqlalchemy/releases)
- [Changelog](https://github.com/sqlalchemy/sqlalchemy/blob/main/CHANGES.rst)
- [Commits](https://github.com/sqlalchemy/sqlalchemy/commits)

---
updated-dependencies:
- dependency-name: sqlalchemy
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 23:50:41 +00:00
dependabot[bot]
8ec7a0a407 chore(deps): Bump numpy from 1.26.4 to 2.0.1 in /examples/langchain/langchainpy-localai-example (#2958)
chore(deps): Bump numpy

Bumps [numpy](https://github.com/numpy/numpy) from 1.26.4 to 2.0.1.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst)
- [Commits](https://github.com/numpy/numpy/compare/v1.26.4...v2.0.1)

---
updated-dependencies:
- dependency-name: numpy
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 22:49:39 +00:00
dependabot[bot]
d3166e8571 chore(deps): Bump langchain from 0.2.8 to 0.2.10 in /examples/langchain/langchainpy-localai-example (#2959)
chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.8 to 0.2.10.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.8...langchain==0.2.10)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 22:49:29 +00:00
dependabot[bot]
2966979161 chore(deps): Bump docs/themes/hugo-theme-relearn from 1b2e139 to 7aec99b (#2952)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `1b2e139` to `7aec99b`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](1b2e139512...7aec99b38d)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 22:26:35 +00:00
dependabot[bot]
f4ed47bf95 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/bark (#2951)
Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 21:47:54 +00:00
dependabot[bot]
1a75546b27 chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in /backend/python/sentencetransformers (#2955)
chore(deps): Bump grpcio in /backend/python/sentencetransformers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 21:41:06 +00:00
dependabot[bot]
a6b92af875 chore(deps): Bump grpcio from 1.64.1 to 1.65.1 in /backend/python/openvoice (#2956)
chore(deps): Bump grpcio in /backend/python/openvoice

Bumps [grpcio](https://github.com/grpc/grpc) from 1.64.1 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.64.1...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-22 21:34:12 +00:00
LocalAI [bot]
3dc601c470 chore: ⬆️ Update ggerganov/llama.cpp (#2943)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-22 16:04:41 +00:00
Ettore Di Giacinto
153e977155 Update distributed_inferencing.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-22 17:35:10 +02:00
fakezeta
7d61de63ae fix: pin setuptools 69.5.1 (#2949)
pin setuptools 69.5.1
2024-07-22 15:40:34 +02:00
Ettore Di Giacinto
bcd9e153ba ci(Makefile): reduce binary size by compressing (#2947)
Makefile: try to reduce binary size

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-22 15:39:57 +02:00
Ettore Di Giacinto
19282af059 models(gallery): add calme-2.4-llama3-70b (#2942)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-21 22:01:15 +02:00
Ettore Di Giacinto
9c0c11e8a0 models(gallery): add StellarDong-72b (#2941)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-21 21:57:30 +02:00
Ettore Di Giacinto
3f7eddb039 models(gallery): add calme-2.8-qwen2-7b (#2940)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-21 21:51:52 +02:00
Ettore Di Giacinto
77ad49333a models(gallery): add calme-2.3-phi3-4b (#2939)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-21 21:45:04 +02:00
Ettore Di Giacinto
ef5e8326c8 models(gallery): add celestev1.2 (#2937)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-21 10:31:44 +02:00
LocalAI [bot]
86509e6002 chore: ⬆️ Update ggerganov/llama.cpp (#2936)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-20 21:35:21 +00:00
LocalAI [bot]
8667a67695 docs: ⬆️ update docs version mudler/LocalAI (#2935)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-20 21:33:54 +00:00
Ettore Di Giacinto
f505d7ab3f models(gallery): add archangel_sft_pythia2-8b (#2933)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-20 16:17:34 +02:00
Ettore Di Giacinto
450dbed820 models(gallery): add suzume-orpo (#2932)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-20 16:16:29 +02:00
Ettore Di Giacinto
46b86f7e6e models(gallery): add tulu 8b and 70b (#2931)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-20 16:03:44 +02:00
Ettore Di Giacinto
0ee1f8c1cf ci(Makefile): enable p2p on cross-arm64 builds (#2928)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-20 10:43:34 +02:00
Ettore Di Giacinto
87bd831aba docs: add federation (#2929)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-20 10:43:18 +02:00
Ettore Di Giacinto
f9f83791d1 ci(release): run also on tags
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-20 09:15:48 +02:00
LocalAI [bot]
e75f73bf73 chore: ⬆️ Update ggerganov/llama.cpp (#2927)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-19 22:10:26 +00:00
LocalAI [bot]
bd277162c7 docs: ⬆️ update docs version mudler/LocalAI (#2926)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-19 21:56:58 +00:00
Ettore Di Giacinto
f19ee465d2 ci: disable comment-pr until it's fixed
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-19 19:00:36 +02:00
Ettore Di Giacinto
7b85ff7280 models(gallery): add celestev1 (#2925)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 18:43:30 +02:00
Ettore Di Giacinto
134cb993c2 models(gallery): add emo-2b (#2924)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 18:36:11 +02:00
Ettore Di Giacinto
2cf28f3c01 models(gallery): add gemma-2b-translation-v0.150 (#2923)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 18:31:27 +02:00
Ettore Di Giacinto
18c0f4718d models(gallery): add einstein-v4-7b (#2922)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 15:20:15 +02:00
Ettore Di Giacinto
f878b63ee4 models(gallery): add qwen2-wukong-7b (#2921)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 09:48:05 +02:00
Ettore Di Giacinto
6eaa01db15 models(gallery): add phillama-3.8b-v0.1 (#2920)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-19 09:42:45 +02:00
62 changed files with 2008 additions and 555 deletions

View File

@@ -41,7 +41,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
title: 'models(gallery): :arrow_up: update checksum'
title: 'chore(model-gallery): :arrow_up: update checksum'
branch: "update/checksum"
body: Updating checksums in gallery/index.yaml
signoff: true

View File

@@ -47,7 +47,7 @@ jobs:
# makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
# makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'cublas'
# cuda-major-version: "12"
# cuda-minor-version: "4"
# cuda-minor-version: "0"
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-cublas-cuda12-ffmpeg-core'

View File

@@ -75,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "4"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'

View File

@@ -4,6 +4,8 @@ on:
push:
branches:
- master
tags:
- 'v*'
pull_request:
env:
@@ -29,11 +31,10 @@ jobs:
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
- name: Install CUDA Dependencies
run: |
@@ -149,7 +150,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
- name: Intel Dependencies
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -250,7 +251,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build stablediffusion

View File

@@ -70,7 +70,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential curl ffmpeg
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
sudo apt-get install -y libgmock-dev
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \

View File

@@ -24,7 +24,7 @@ RUN apt-get update && \
cmake \
curl \
git \
unzip && \
unzip upx-ucl && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=4
ARG CUDA_MINOR_VERSION=0
ENV BUILD_TYPE=${BUILD_TYPE}

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=705b7ecf60e667ced57c15d67aa86865e3cc7aa7
CPPLLAMA_VERSION?=4730faca618ff9cee0780580145e3cbe86f24876
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
WHISPER_CPP_VERSION?=6739eb83c3ca5cf40d24c6fe8442a761a1eb6248
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit
LD_FLAGS?=
LD_FLAGS?=-s -w
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
@@ -72,6 +72,14 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
UPX?=
# check if upx exists
ifeq (, $(shell which upx))
UPX=
else
UPX=$(shell which upx)
endif
# Default Docker bridge IP
E2E_BRIDGE_IP?=172.17.0.1
@@ -377,6 +385,7 @@ build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -421,7 +430,7 @@ else
endif
dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
STATIC=true $(MAKE) build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
@@ -471,7 +480,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -733,13 +742,22 @@ backend-assets/grpc: protogen-go replace
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/gpt4all
endif
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/huggingface
endif
backend/cpp/llama/llama.cpp:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -765,6 +783,9 @@ else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
endif
ifneq ($(UPX),)
$(UPX) backend/cpp/${VARIANT}/grpc-server
endif
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -837,33 +858,57 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
ifneq ($(UPX),)
$(UPX) backend-assets/util/llama-cpp-rpc-server
endif
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml
endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper
endif
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/rwkv
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion
endif
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/tinydream
endif
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper
endif
backend-assets/grpc/local-store: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/local-store
endif
grpcs: prepare $(GRPC_BACKENDS)

View File

@@ -2259,7 +2259,6 @@ static void params_parse(const backend::ModelOptions* request,
// get the directory of modelfile
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
params.lora_base = model_dir + "/"+request->lorabase();
}
params.use_mlock = request->mlock();
params.use_mmap = request->mmap();

View File

@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.65.0
grpcio==1.65.1
protobuf
torch
certifi

View File

@@ -1,6 +1,6 @@
accelerate
bark==0.1.5
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi
transformers

View File

@@ -1,2 +1,2 @@
grpcio==1.65.0
grpcio==1.65.1
protobuf

View File

@@ -1,6 +1,6 @@
accelerate
TTS==0.22.0
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi
transformers

View File

@@ -3,7 +3,7 @@ accelerate
compel
peft
diffusers
grpcio==1.65.0
grpcio==1.65.1
opencv-python
pillow
protobuf

View File

@@ -1,5 +1,5 @@
accelerate
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi
torch

View File

@@ -1,6 +1,6 @@
causal-conv1d==1.4.0
mamba-ssm==2.2.2
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi
transformers

View File

@@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.64.1
grpcio==1.65.1
protobuf
librosa==0.9.1
faster-whisper==1.0.3

View File

@@ -1,4 +1,4 @@
grpcio==1.65.0
grpcio==1.65.1
protobuf
librosa
faster-whisper

View File

@@ -1,5 +1,5 @@
accelerate
grpcio==1.65.0
grpcio==1.65.1
protobuf
torch
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16

View File

@@ -1,6 +1,6 @@
accelerate
rerankers[transformers]
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi
transformers

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
sentence-transformers==3.0.1
transformers
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate
transformers
grpcio==1.65.0
grpcio==1.65.1
protobuf
torch
scipy==1.14.0

View File

@@ -2,4 +2,3 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,9 +1,9 @@
accelerate
transformers
grpcio==1.65.0
grpcio==1.65.1
protobuf
torch
certifi
intel-extension-for-transformers
bitsandbytes
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,4 @@
accelerate
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi

View File

@@ -1,6 +1,6 @@
accelerate
vllm
grpcio==1.65.0
grpcio==1.65.1
protobuf
certifi
transformers

View File

@@ -204,35 +204,34 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
}
var filesToRemove []string
// Remove additional files
if galleryconfig != nil {
for _, f := range galleryconfig.Files {
fullPath := filepath.Join(basePath, f.Filename)
log.Debug().Msgf("Removing file %s", fullPath)
if e := os.Remove(fullPath); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
}
filesToRemove = append(filesToRemove, fullPath)
}
}
for _, f := range additionalFiles {
fullPath := filepath.Join(filepath.Join(basePath, f))
log.Debug().Msgf("Removing additional file %s", fullPath)
if e := os.Remove(fullPath); e != nil {
filesToRemove = append(filesToRemove, fullPath)
}
filesToRemove = append(filesToRemove, configFile)
filesToRemove = append(filesToRemove, galleryFile)
// skip duplicates
filesToRemove = utils.Unique(filesToRemove)
// Removing files
for _, f := range filesToRemove {
if e := os.Remove(f); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
}
}
log.Debug().Msgf("Removing model config file %s", configFile)
// Delete the model config file
if e := os.Remove(configFile); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
}
// Delete gallery config file
os.Remove(galleryFile)
return err
}

View File

@@ -9,7 +9,6 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/xsync"
)
const (
@@ -372,7 +371,12 @@ func dropBadChars(s string) string {
return strings.ReplaceAll(s, "@", "__")
}
func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
type ProcessTracker interface {
Exists(string) bool
Get(string) string
}
func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
modelsElements := []elem.Node{}
descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
return elem.Div(
@@ -396,7 +400,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
actionDiv := func(m *gallery.GalleryModel) elem.Node {
galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
currentlyProcessing := processing.Exists(galleryID)
currentlyProcessing := processTracker.Exists(galleryID)
jobID := ""
isDeletionOp := false
if currentlyProcessing {
@@ -404,7 +408,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
if status != nil && status.Deletion {
isDeletionOp = true
}
jobID = processing.Get(galleryID)
jobID = processTracker.Get(galleryID)
// TODO:
// case not handled, if status == nil : "Waiting"
}

View File

@@ -226,9 +226,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// Update input grammar
jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
if err == nil {
config.Grammar = g
}
case input.JSONFunctionGrammarObject != nil:
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
if err == nil {
config.Grammar = g
}
default:
// Force picking one of the functions by the request
if config.FunctionToCall() != "" {

View File

@@ -21,6 +21,40 @@ import (
"github.com/google/uuid"
)
type modelOpCache struct {
status *xsync.SyncedMap[string, string]
}
func NewModelOpCache() *modelOpCache {
return &modelOpCache{
status: xsync.NewSyncedMap[string, string](),
}
}
func (m *modelOpCache) Set(key string, value string) {
m.status.Set(key, value)
}
func (m *modelOpCache) Get(key string) string {
return m.status.Get(key)
}
func (m *modelOpCache) DeleteUUID(uuid string) {
for _, k := range m.status.Keys() {
if m.status.Get(k) == uuid {
m.status.Delete(k)
}
}
}
func (m *modelOpCache) Map() map[string]string {
return m.status.Map()
}
func (m *modelOpCache) Exists(key string) bool {
return m.status.Exists(key)
}
func RegisterUIRoutes(app *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
@@ -29,7 +63,7 @@ func RegisterUIRoutes(app *fiber.App,
auth func(*fiber.Ctx) error) {
// keeps the state of models that are being installed from the UI
var processingModels = xsync.NewSyncedMap[string, string]()
var processingModels = NewModelOpCache()
// modelStatus returns the current status of the models being processed (installation or deletion)
// it is called asynchonously from the UI
@@ -232,6 +266,8 @@ func RegisterUIRoutes(app *fiber.App,
return c.SendString(elements.ProgressBar("100"))
}
if status.Error != nil {
// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
processingModels.DeleteUUID(jobUID)
return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
}
@@ -246,12 +282,7 @@ func RegisterUIRoutes(app *fiber.App,
status := galleryService.GetStatus(jobUID)
galleryID := ""
for _, k := range processingModels.Keys() {
if processingModels.Get(k) == jobUID {
galleryID = k
processingModels.Delete(k)
}
}
processingModels.DeleteUUID(jobUID)
if galleryID == "" {
log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
}

View File

@@ -16,7 +16,16 @@
</a>
</h2>
<h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
<!-- Warning box if p2p token is empty and p2p is enabled -->
{{ if and .IsP2PEnabled (eq .P2PToken "") }}
<div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
<p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
<p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
Check out the documentation for more information.
</a> </p>
</div>
{{ else }}
<!-- Federation Box -->
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
@@ -128,7 +137,8 @@
</div>
</div>
</div>
<!-- Llama.cpp Box END -->
<!-- Llama.cpp Box END -->
{{ end }}
</div>
</div>

View File

@@ -5,17 +5,65 @@ weight = 15
url = "/features/distribute/"
+++
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
LocalAI supports two modes of distributed inferencing via p2p:
- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
## Usage
Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups.
Simply navigate to the "Swarm" section in the WebUI and follow the on-screen instructions.
For fully shared instances, initiate LocalAI with --p2p --federated and adhere to the Swarm section's guidance. This feature, while still experimental, offers a tech preview quality experience.
### Federated mode
Federated mode allows to launch multiple LocalAI instances and connect them together in a federated network. This mode is useful when you want to distribute the load of the inference across multiple nodes, but you want to have a single point of entry for the API. In the Swarm section of the WebUI, you can see the instructions to connect multiple instances together.
![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/19ebd44a-20ff-412c-b92f-cfb8efbe4b21)
To start a LocalAI server in federated mode, run:
```bash
local-ai run --p2p --federated
```
This will generate a token that you can use to connect other LocalAI instances to the network or others can use to join the network. If you already have a token, you can specify it using the `TOKEN` environment variable.
To start a load balanced server that routes the requests to the network, run with the `TOKEN`:
```bash
local-ai federated
```
To see all the available options, run `local-ai federated --help`.
The instructions are displayed in the "Swarm" section of the WebUI, guiding you through the process of connecting multiple instances.
### Workers mode
{{% alert note %}}
This feature is available exclusively with llama-cpp compatible models.
This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
{{% /alert %}}
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance.
To connect multiple workers to a single LocalAI instance, start first a server in p2p mode:
## Usage
```bash
local-ai run --p2p
```
### Starting Workers
And navigate the WebUI to the "Swarm" section to see the instructions to connect multiple workers to the network.
![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/b8cadddf-a467-49cf-a1ed-8850de95366d)
### Without P2P
To start workers for distributing the computational load, run:
@@ -23,48 +71,27 @@ To start workers for distributing the computational load, run:
local-ai worker llama-cpp-rpc <listening_address> <listening_port>
```
Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
### Starting LocalAI
To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable:
And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:
```bash
LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
```
The workload on the LocalAI server will then be distributed across the specified nodes.
## Peer-to-Peer Networking
Alternatively, you can build the RPC workers/server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
## Manual example (worker)
Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner.
A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks.
The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument.
A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect.
When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343).
### Usage
Use the WebUI to guide you in the process of starting new workers. This example shows the manual steps to highlight the process.
1. Start the server with `--p2p`:
```bash
./local-ai run --p2p
# 1:02AM INF loading environment variables from file envFile=.env
# 1:02AM INF Setting logging to info
# 1:02AM INF P2P mode enabled
# 1:02AM INF No token provided, generating one
# 1:02AM INF Generated Token:
# XXXXXXXXXXX
# 1:02AM INF Press a button to proceed
# Get the token in the Swarm section of the WebUI
```
Copy the displayed token and press Enter.
Copy the token from the WebUI or via API call (e.g., `curl http://localhost:8000/p2p/token`) and save it for later use.
To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`.
@@ -93,11 +120,7 @@ The server logs should indicate that new workers are being discovered.
3. Start inference as usual on the server initiated in step 1.
## Notes
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
## Environment Variables
@@ -109,3 +132,20 @@ There are options that can be tweaked or parameters that can be set using enviro
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
## Architecture
LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers.
[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
## Notes
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)

View File

@@ -1,3 +1,3 @@
{
"version": "v2.18.1"
"version": "v2.19.2"
}

View File

@@ -1,6 +1,6 @@
llama_index==0.10.55
llama_index==0.10.56
requests==2.32.3
weaviate_client==4.6.5
weaviate_client==4.6.7
transformers
torch
chainlit

View File

@@ -1,2 +1,2 @@
langchain==0.2.8
openai==1.35.13
langchain==0.2.10
openai==1.37.0

View File

@@ -1,4 +1,4 @@
langchain==0.2.8
openai==1.35.13
langchain==0.2.10
openai==1.37.0
chromadb==0.5.4
llama-index==0.10.55
llama-index==0.10.56

View File

@@ -10,21 +10,21 @@ debugpy==1.8.2
frozenlist==1.4.1
greenlet==3.0.3
idna==3.7
langchain==0.2.8
langchain-community==0.2.7
langchain==0.2.10
langchain-community==0.2.9
marshmallow==3.21.3
marshmallow-enum==1.5.1
multidict==6.0.5
mypy-extensions==1.0.0
numexpr==2.10.1
numpy==1.26.4
openai==1.35.13
numpy==2.0.1
openai==1.37.0
openapi-schema-pydantic==1.2.4
packaging>=23.2
pydantic==2.8.2
PyYAML==6.0.1
requests==2.32.3
SQLAlchemy==2.0.30
SQLAlchemy==2.0.31
tenacity==8.5.0
tqdm==4.66.4
typing-inspect==0.9.0

View File

@@ -1,6 +1,244 @@
---
## Deepseek
## LLama3.1
- &llama31
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
name: "meta-llama-3.1-8b-instruct"
license: llama3.1
description: |
The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
Model developer: Meta
Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
tags:
- llm
- gguf
- gpu
- cpu
- llama3.1
overrides:
parameters:
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-70b-instruct"
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF
overrides:
parameters:
model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-instruct:grammar-functioncall"
url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
description: |
This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
overrides:
parameters:
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-claude-imat"
urls:
- https://huggingface.co/Undi95/Meta-Llama-3.1-8B-Claude
- https://huggingface.co/InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF
description: |
Meta-Llama-3.1-8B-Claude-iMat-GGUF: Quantized from Meta-Llama-3.1-8B-Claude fp16. Weighted quantizations were creating using fp16 GGUF and groups_merged.txt in 88 chunks and n_ctx=512. Static fp16 will also be included in repo. For a brief rundown of iMatrix quant performance, please see this PR. All quants are verified working prior to uploading to repo for your safety and convenience.
overrides:
parameters:
model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-instruct-abliterated"
icon: https://i.imgur.com/KhorYYG.png
urls:
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
description: |
This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
overrides:
parameters:
model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
files:
- filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
sha256: 18cca47adfb3954af2b49e3aa2ce1604158337aff45fab2e7654039b65c7683e
uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama-3.1-70b-japanese-instruct-2407"
urls:
- https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
- https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
description: |
The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
overrides:
parameters:
model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
files:
- filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *llama31
name: "openbuddy-llama3.1-8b-v22.1-131k"
icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
urls:
- https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
description: |
OpenBuddy - Open Multilingual Chatbot
overrides:
parameters:
model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
files:
- filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
- !!merge <<: *llama31
name: "llama3.1-8b-fireplace2"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/JYkaXrk2DqpXhaL9WymKY.jpeg
urls:
- https://huggingface.co/ValiantLabs/Llama3.1-8B-Fireplace2
- https://huggingface.co/mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF
description: |
Fireplace 2 is a chat model, adding helpful structured outputs to Llama 3.1 8b Instruct.
an expansion pack of supplementary outputs - request them at will within your chat:
Inline function calls
SQL queries
JSON objects
Data visualization with matplotlib
Mix normal chat and structured outputs within the same conversation.
Fireplace 2 supplements the existing strengths of Llama 3.1, providing inline capabilities within the Llama 3 Instruct format.
Version
This is the 2024-07-23 release of Fireplace 2 for Llama 3.1 8b.
We're excited to bring further upgrades and releases to Fireplace 2 in the future.
Help us and recommend Fireplace 2 to your friends!
overrides:
parameters:
model: llama3.1-8b-fireplace2-q4_k_m.gguf
files:
- filename: llama3.1-8b-fireplace2-q4_k_m.gguf
sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
## Uncensored models
- !!merge <<: *llama31
name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
urls:
- https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
- https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
description: |
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
Saving money(LLama 3.1)
only test en.
Input Models input text only. Output Models generate text and code only.
Uncensored
Quick response
A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
DarkIdol:Roles that you can imagine and those that you cannot imagine.
Roleplay
Specialized in various role-playing scenarios
How To
System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
overrides:
parameters:
model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
files:
- filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png
urls:
- https://huggingface.co/LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request
description: |
Uncensored
virtual idol Twitter
https://x.com/aifeifei799
Questions
The model's response results are for reference only, please do not fully trust them.
This model is solely for learning and testing purposes, and errors in output are inevitable. We do not take responsibility for the output results. If the output content is to be used, it must be modified; if not modified, we will assume it has been altered.
For commercial licensing, please refer to the Llama 3.1 agreement.
overrides:
parameters:
model: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
files:
- filename: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
sha256: fa9fc56de7d902b755c43f1a5d0867d961675174a1b3e73a10d822836c3390e6
uri: huggingface://LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
- !!merge <<: *llama31
name: "llama-3.1-8b-instruct-fei-v1-uncensored"
icon: https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored/resolve/main/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.png
urls:
- https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored
- https://huggingface.co/mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF
description: |
Llama-3.1-8B-Instruct Uncensored
more informtion look at Llama-3.1-8B-Instruct
overrides:
parameters:
model: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
files:
- filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
sha256: 6b1985616160712eb884c34132dc0602fa4600a19075e3a7b179119b89b73f77
- !!merge <<: *llama31
name: "lumimaid-v0.2-8b"
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-8B
- https://huggingface.co/mradermacher/Lumimaid-v0.2-8B-GGUF
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/TUcHg7LKNjfo0sni88Ps7.png
description: |
This model is based on: Meta-Llama-3.1-8B-Instruct
Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: Lumimaid-v0.2-8B.Q4_K_M.gguf
files:
- filename: Lumimaid-v0.2-8B.Q4_K_M.gguf
sha256: c8024fcb49c71410903d0d076a1048249fa48b31637bac5177bf5c3f3d603d85
uri: huggingface://mradermacher/Lumimaid-v0.2-8B-GGUF/Lumimaid-v0.2-8B.Q4_K_M.gguf
- &deepseek
## Deepseek
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
name: "deepseek-coder-v2-lite-instruct"
icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
@@ -24,6 +262,33 @@
- filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
- name: "archangel_sft_pythia2-8b"
url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
license: apache-2.0
urls:
- https://huggingface.co/ContextualAI/archangel_sft_pythia2-8b
- https://huggingface.co/RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf
- https://github.com/ContextualAI/HALOs
description: |
datasets:
- stanfordnlp/SHP
- Anthropic/hh-rlhf
- OpenAssistant/oasst1
This repo contains the model checkpoints for:
- model family pythia2-8b
- optimized with the loss SFT
- aligned using the SHP, Anthropic HH and Open Assistant datasets.
Please refer to our [code repository](https://github.com/ContextualAI/HALOs) or [blog](https://contextual.ai/better-cheaper-faster-llm-alignment-with-kto/) which contains intructions for training your own HALOs and links to our model cards.
overrides:
parameters:
model: archangel_sft_pythia2-8b.Q4_K_M.gguf
files:
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
- &qwen2
## Start QWEN2
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -202,6 +467,54 @@
- filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
uri: huggingface://MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
- !!merge <<: *qwen2
name: "qwen2-wukong-7b"
icon: https://cdn-uploads.huggingface.co/production/uploads/655dc641accde1bbc8b41aec/xOe1Nb3S9Nb53us7_Ja3s.jpeg
urls:
- https://huggingface.co/bartowski/Qwen2-Wukong-7B-GGUF
description: |
Qwen2-Wukong-7B is a dealigned chat finetune of the original fantastic Qwen2-7B model by the Qwen team.
This model was trained on the teknium OpenHeremes-2.5 dataset and some supplementary datasets from Cognitive Computations
This model was trained for 3 epochs with a custom FA2 implementation for AMD cards.
overrides:
parameters:
model: Qwen2-Wukong-7B-Q4_K_M.gguf
files:
- filename: Qwen2-Wukong-7B-Q4_K_M.gguf
sha256: 6b8ca6649c33fc84d4892ebcff1214f0b34697aced784f0d6d32e284a15943ad
uri: huggingface://bartowski/Qwen2-Wukong-7B-GGUF/Qwen2-Wukong-7B-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "calme-2.8-qwen2-7b"
icon: https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
urls:
- https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b
- https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b-GGUF
description: |
This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
overrides:
parameters:
model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
files:
- filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
uri: huggingface://MaziyarPanahi/calme-2.8-qwen2-7b-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
- !!merge <<: *qwen2
name: "stellardong-72b-i1"
icon: https://huggingface.co/smelborp/StellarDong-72b/resolve/main/stellardong.png
urls:
- https://huggingface.co/smelborp/StellarDong-72b
- https://huggingface.co/mradermacher/StellarDong-72b-i1-GGUF
description: |
Magnum + Nova = you won't believe how stellar this dong is!!
overrides:
parameters:
model: StellarDong-72b.i1-Q4_K_M.gguf
files:
- filename: StellarDong-72b.i1-Q4_K_M.gguf
sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
- &mistral03
## START Mistral
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -264,6 +577,66 @@
- filename: Mahou-1.3d-mistral-7B.i1-Q4_K_M.gguf
sha256: 8272f050e36d612ab282e095cb4e775e2c818e7096f8d522314d256923ef6da9
uri: huggingface://mradermacher/Mahou-1.3d-mistral-7B-i1-GGUF/Mahou-1.3d-mistral-7B.i1-Q4_K_M.gguf
- name: "einstein-v4-7b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/U0zyXVGj-O8a7KP3BvPue.png
urls:
- https://huggingface.co/Weyaxi/Einstein-v4-7B
- https://huggingface.co/mradermacher/Einstein-v4-7B-GGUF
tags:
- llm
- gguf
- gpu
- mistral
- cpu
description: "\U0001F52C Einstein-v4-7B\n\nThis model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.\n\nThis model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.\n"
overrides:
parameters:
model: Einstein-v4-7B.Q4_K_M.gguf
files:
- filename: Einstein-v4-7B.Q4_K_M.gguf
sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
- !!merge <<: *mistral03
name: "mistral-nemo-instruct-2407"
urls:
- https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407
- https://huggingface.co/bartowski/Mistral-Nemo-Instruct-2407-GGUF
- https://mistral.ai/news/mistral-nemo/
description: |
The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407. Trained jointly by Mistral AI and NVIDIA, it significantly outperforms existing models smaller or similar in size.
overrides:
parameters:
model: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
files:
- filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "lumimaid-v0.2-12b"
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/ep3ojmuMkFS-GmgRuI9iB.png
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-12B
- https://huggingface.co/mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF
description: |
This model is based on: Mistral-Nemo-Instruct-2407
Wandb: https://wandb.ai/undis95/Lumi-Mistral-Nemo?nw=nwuserundis95
NOTE: As explained on Mistral-Nemo-Instruct-2407 repo, it's recommended to use a low temperature, please experiment!
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: lumimaid-v0.2-12b-q4_k_m.gguf
files:
- filename: lumimaid-v0.2-12b-q4_k_m.gguf
sha256: f72299858a07e52be920b86d42ddcfcd5008b961d601ef6fd6a98a3377adccbf
uri: huggingface://mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF/lumimaid-v0.2-12b-q4_k_m.gguf
- &mudler
### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -594,6 +967,91 @@
- filename: Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
sha256: c5fc5605d36ae280c1c908c9b4bcb12b28abbe2692f317edeb83ab1104657fe5
uri: huggingface://TheDrummer/Big-Tiger-Gemma-27B-v1-GGUF/Big-Tiger-Gemma-27B-v1c-Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemma-2b-translation-v0.150"
urls:
- https://huggingface.co/lemon-mint/gemma-2b-translation-v0.150
- https://huggingface.co/RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf
description: |
Original model: lemon-mint/gemma-ko-1.1-2b-it
Evaluation metrics: Eval Loss, Train Loss, lr, optimizer, lr_scheduler_type.
Prompt Template:
<bos><start_of_turn>user
Translate into Korean: [input text]<end_of_turn>
<start_of_turn>model
[translated text in Korean]<eos>
<bos><start_of_turn>user
Translate into English: [Korean text]<end_of_turn>
<start_of_turn>model
[translated text in English]<eos>
Model features:
* Developed by: lemon-mint
* Model type: Gemma
* Languages (NLP): English
* License: Gemma Terms of Use
* Finetuned from model: lemon-mint/gemma-ko-1.1-2b-it
overrides:
parameters:
model: gemma-2b-translation-v0.150.Q4_K_M.gguf
files:
- filename: gemma-2b-translation-v0.150.Q4_K_M.gguf
sha256: dcde67b83168d2e7ca835cf9a7a4dcf38b41b9cefe3cbc997c71d2741c08cd25
uri: huggingface://RichardErkhov/lemon-mint_-_gemma-2b-translation-v0.150-gguf/gemma-2b-translation-v0.150.Q4_K_M.gguf
- !!merge <<: *gemma
name: "emo-2b"
urls:
- https://huggingface.co/OEvortex/EMO-2B
- https://huggingface.co/RichardErkhov/OEvortex_-_EMO-2B-gguf
description: |
EMO-2B: Emotionally Intelligent Conversational AI
Overview:
EMO-2B is a state-of-the-art conversational AI model with 2.5 billion parameters, designed to engage in emotionally resonant dialogue. Building upon the success of EMO-1.5B, this model has been further fine-tuned on an extensive corpus of emotional narratives, enabling it to perceive and respond to the emotional undertones of user inputs with exceptional empathy and emotional intelligence.
Key Features:
- Advanced Emotional Intelligence: With its increased capacity, EMO-2B demonstrates an even deeper understanding and generation of emotional language, allowing for more nuanced and contextually appropriate emotional responses.
- Enhanced Contextual Awareness: The model considers an even broader context within conversations, accounting for subtle emotional cues and providing emotionally resonant responses tailored to the specific situation.
- Empathetic and Supportive Dialogue: EMO-2B excels at active listening, validating emotions, offering compassionate advice, and providing emotional support, making it an ideal companion for users seeking empathy and understanding.
- Dynamic Persona Adaptation: The model can dynamically adapt its persona, communication style, and emotional responses to match the user's emotional state, ensuring a highly personalized and tailored conversational experience.
Use Cases:
EMO-2B is well-suited for a variety of applications where emotional intelligence and empathetic communication are crucial, such as:
- Mental health support chatbots
- Emotional support companions
- Personalized coaching and motivation
- Narrative storytelling and interactive fiction
- Customer service and support (for emotionally sensitive contexts)
Limitations and Ethical Considerations:
While EMO-2B is designed to provide emotionally intelligent and empathetic responses, it is important to note that it is an AI system and cannot replicate the depth and nuance of human emotional intelligence. Users should be aware that the model's responses, while emotionally supportive, should not be considered a substitute for professional mental health support or counseling.
Additionally, as with any language model, EMO-2B may reflect biases present in its training data. Users should exercise caution and critical thinking when interacting with the model, and report any concerning or inappropriate responses.
overrides:
parameters:
model: EMO-2B.Q4_K_M.gguf
files:
- filename: EMO-2B.Q4_K_M.gguf
sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemmoy-9b-g2-mk.3-i1"
icon: https://huggingface.co/Hastagaras/G2-Gemmoy-9B-MK.3-RP/resolve/main/gemmoy.jpg
urls:
- https://huggingface.co/Hastagaras/Gemmoy-9B-G2-MK.3
- https://huggingface.co/mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF
description: |
The Gemmoy-9B-G2-MK.3 model is a large language model trained on a variety of datasets, including grimulkan/LimaRP-augmented, LDJnr/Capybara, TheSkullery/C2logs_Filtered_Sharegpt_Merged, abacusai/SystemChat-1.1, and Hastagaras/FTTS-Stories-Sharegpt.
overrides:
parameters:
model: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
files:
- filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -802,6 +1260,36 @@
- filename: llama-3-stheno-mahou-8b-q4_k_m.gguf
sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11
uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf
- !!merge <<: *llama3
name: "l3-8b-stheno-horny-v3.3-32k-q5_k_m"
urls:
- https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
- https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF
description: |
This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
We took V3.3 Stheno weights from here
And applied our lora at Alpha = 768
Thank you to Sao10K for the amazing model.
This is not legal advice. I don't put any extra licensing on my own lora.
LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
LLaMA 3 license can be found here
If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
Again, not legal advice.
overrides:
parameters:
model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
files:
- filename: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
sha256: 8d934f80ca6dbaa4852846108da92446a26715fbd5f6fc3859568850edf05262
uri: huggingface://Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF/l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
- !!merge <<: *llama3
name: "llama-3-8b-openhermes-dpo"
urls:
@@ -2853,7 +3341,6 @@
- filename: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
sha256: 71fef02915c606b438ccff2cae6b7760bbb54a558d5f2d39c2421d97b6682fea
uri: huggingface://QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF/ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3-ezo-8b-common-it"
icon: https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
@@ -2861,11 +3348,11 @@
- https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
- https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF
description: |
Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
overrides:
parameters:
model: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf
@@ -2994,7 +3481,6 @@
- filename: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
sha256: ecbd57783006f1a027f8a7f5a5d551dc8b3568912825f566d79fd34a804e8970
uri: huggingface://mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF/L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-15b-etherealmaid-t0.0001-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/FwYXt2h_FdmlL0Z6qYufz.png
@@ -3016,6 +3502,106 @@
- filename: L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf
sha256: 2911be6be8e0fd4184998d452410ba847491b4ab71a928749de87cafb0e13757
uri: huggingface://mradermacher/L3-15B-EtherealMaid-t0.0001-i1-GGUF/L3-15B-EtherealMaid-t0.0001.i1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-8b-celeste-v1"
icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
urls:
- https://huggingface.co/nothingiisreal/L3-8B-Celeste-v1
- https://huggingface.co/bartowski/L3-8B-Celeste-v1-GGUF
description: |
Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
overrides:
parameters:
model: L3-8B-Celeste-v1-Q4_K_M.gguf
files:
- filename: L3-8B-Celeste-v1-Q4_K_M.gguf
sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317
uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-8b-celeste-v1.2"
icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
urls:
- https://huggingface.co/mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF
description: |
Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
overrides:
parameters:
model: l3-8b-celeste-v1.2-q4_k_m.gguf
files:
- filename: l3-8b-celeste-v1.2-q4_k_m.gguf
sha256: 7752204c0e9f627ff5726eb69bb6114974cafbc934a993ad019abfba62002783
uri: huggingface://mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF/l3-8b-celeste-v1.2-q4_k_m.gguf
- !!merge <<: *llama3
name: "llama-3-tulu-2-8b-i1"
icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
urls:
- https://huggingface.co/allenai/llama-3-tulu-2-8b
- https://huggingface.co/mradermacher/llama-3-tulu-2-8b-i1-GGUF
description: |
Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
overrides:
parameters:
model: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
files:
- filename: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
sha256: f859c22bfa64f461e9ffd973dc7ad6a78bb98b1dda6f49abfa416a4022b7e333
uri: huggingface://mradermacher/llama-3-tulu-2-8b-i1-GGUF/llama-3-tulu-2-8b.i1-Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3-tulu-2-dpo-70b-i1"
icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
urls:
- https://huggingface.co/allenai/llama-3-tulu-2-dpo-70b
- https://huggingface.co/mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF
description: |
Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
overrides:
parameters:
model: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
files:
- filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5
uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
- !!merge <<: *llama3
license: cc-by-nc-4.0
name: "suzume-llama-3-8b-multilingual-orpo-borda-top25"
icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kWQSu02YfgYdUQqv4s5lq.png
urls:
- https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25
- https://huggingface.co/RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf
description: |
This is Suzume ORPO, an ORPO trained fine-tune of the lightblue/suzume-llama-3-8B-multilingual model using our lightblue/mitsu dataset.
We have trained several versions of this model using ORPO and so recommend that you use the best performing model from our tests, lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half.
Note that this model has a non-commerical license as we used the Command R and Command R+ models to generate our training data for this model (lightblue/mitsu).
We are currently working on a developing a commerically usable model, so stay tuned for that!
overrides:
parameters:
model: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
files:
- filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388
uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
- !!merge <<: *llama3
name: "calme-2.4-llama3-70b"
icon: https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b/resolve/main/llama-3-merges.webp
urls:
- https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b
- https://huggingface.co/mradermacher/calme-2.4-llama3-70b-GGUF
description: |
This model is a fine-tune (DPO) of meta-llama/Meta-Llama-3-70B-Instruct model.
overrides:
parameters:
model: calme-2.4-llama3-70b.Q4_K_M.gguf
files:
- filename: calme-2.4-llama3-70b.Q4_K_M.gguf
sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
- &command-R
### START Command-r
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -3258,8 +3844,40 @@
model: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
files:
- filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
sha256: d6d25bf078321bea4a079c727b273cb0b5a2e0b4cf3add0f7a2c8e43075c414f
- !!merge <<: *phi-3
name: "phillama-3.8b-v0.1"
icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png
urls:
- https://huggingface.co/RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf
description: |
The description of the LLM model is:
Phillama is a model based on Phi-3-mini and trained on Llama-generated dataset raincandy-u/Dextromethorphan-10k to make it more "llama-like". Also, this model is converted into Llama format, so it will work with any Llama-2/3 workflow. The model aims to generate text with a specific "llama-like" style and is suited for text-generation tasks.
overrides:
parameters:
model: phillama-3.8b-v0.1.Q4_K_M.gguf
files:
- filename: phillama-3.8b-v0.1.Q4_K_M.gguf
sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2
uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf
- !!merge <<: *llama3
name: "calme-2.3-phi3-4b"
icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp
urls:
- https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b
- https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b-GGUF
description: |
MaziyarPanahi/calme-2.1-phi3-4b
This model is a fine-tune (DPO) of microsoft/Phi-3-mini-4k-instruct model.
overrides:
parameters:
model: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
files:
- filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127
uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
- &hermes-2-pro-mistral
### START Hermes
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"

View File

@@ -31,7 +31,7 @@ config_file: |
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Function call:
chat: |
<|begin_of_text|>{{.Input }}
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}

View File

@@ -0,0 +1,64 @@
---
name: "llama3-instruct-grammar"
config_file: |
mmap: true
function:
disable_no_action: true
grammar:
no_mixed_free_string: true
mixed_mode: true
schema_type: llama3.1 # or JSON is supported too (json)
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
- <|end_of_text|>

View File

@@ -0,0 +1,62 @@
---
name: "llama3-instruct"
config_file: |
mmap: true
function:
disable_no_action: true
grammar:
disable: true
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
- <|end_of_text|>

43
gallery/tuluv2.yaml Normal file
View File

@@ -0,0 +1,43 @@
---
name: "tuluv2"
config_file: |
mmap: true
template:
chat_message: |
<|{{ .RoleName }}|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}
function: |
<|{{ .RoleName }}|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}
chat: |
{{.Input -}}
<|assistant|>
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- '<|im_end|>'
- '<dummy32000>'
- '<|endoftext|>'

View File

@@ -0,0 +1,43 @@
package functions
import (
"encoding/json"
"github.com/mudler/LocalAI/pkg/functions/grammars"
)
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*grammars.GrammarOption)) (string, error) {
grammarOpts := &grammars.GrammarOption{}
grammarOpts.Apply(options...)
dat, err := json.Marshal(j)
if err != nil {
return "", err
}
converter := NewSchemaConverter(*grammarOpts)
return converter.GrammarFromBytes(dat, options...)
}
type SchemaConverter interface {
GrammarFromBytes([]byte, ...func(*grammars.GrammarOption)) (string, error)
}
func NewSchemaConverter(opt grammars.GrammarOption) SchemaConverter {
switch {
case opt.SchemaType == grammars.LLama31Schema:
return grammars.NewLLama31SchemaConverter(opt.FunctionName)
}
return grammars.NewJSONSchemaConverter(opt.PropOrder)
}

View File

@@ -18,6 +18,15 @@ type Function struct {
}
type Functions []Function
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Tool struct {
Type string `json:"type"`
Function Function `json:"function,omitempty"`

View File

@@ -1,4 +1,4 @@
package functions
package functions_test
import (
"testing"
@@ -7,7 +7,7 @@ import (
. "github.com/onsi/gomega"
)
func TestGrammar(t *testing.T) {
func TestFunctions(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Grammar test suite")
RunSpecs(t, "Functions test suite")
}

View File

@@ -1,378 +0,0 @@
package functions
// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
"github.com/mudler/LocalAI/pkg/utils"
)
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)
var (
SPACE_RULE = `" "?`
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
type JSONSchemaConverter struct {
propOrder map[string]int
rules map[string]string
}
func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
propOrderSlice := strings.Split(propOrder, ",")
propOrderMap := make(map[string]int)
for idx, name := range propOrderSlice {
propOrderMap[name] = idx
}
rules := make(map[string]string)
rules["space"] = SPACE_RULE
return &JSONSchemaConverter{
propOrder: propOrderMap,
rules: rules,
}
}
func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) string {
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jsonString(literal), func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped)
}
func (sc *JSONSchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
const arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
const array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
prefix := grammarOpts.Prefix
maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString
noMixedFreeString := grammarOpts.NoMixedFreeString
var lines []string
swapRoot := maybeArray || maybeString || prefix != ""
// write down the computed rules.
// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
for name, rule := range sc.rules {
if swapRoot && name == "root" {
name = "realvalue"
}
lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
}
if !swapRoot {
return strings.Join(lines, "\n")
}
newRoot := "realvalue"
if maybeArray {
newRoot = "arr | realvalue"
}
freestringRule := "mixedstring"
if noMixedFreeString {
freestringRule = "freestring"
}
if prefix != "" {
// quote newlines in suffix
prefix = utils.EscapeNewLines(prefix)
if maybeArray && maybeString {
newRoot = "(" + newRoot + ")"
}
if maybeString {
//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
} else {
newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
}
} else if maybeString {
if maybeArray {
// newRoot = "(" + newRoot + ")"
}
newRoot = freestringRule + " | " + newRoot
}
lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
if disableParallelNewLines {
lines = append(lines, array)
} else {
lines = append(lines, arrayNewLines)
}
if maybeArray {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
}
} else {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
}
}
return strings.Join(lines, "\n")
}
func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) string {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule)
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema := sc.resolveReference(ref, rootSchema)
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
return sc.addRule(ruleName, sc.formatLiteral(constVal))
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule := sc.formatLiteral(enumVal)
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule)
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
propOrder := sc.propOrder
var propPairs []struct {
propName string
propSchema map[string]interface{}
}
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
iOrder := propOrder[propPairs[i].propName]
jOrder := propOrder[propPairs[j].propName]
if iOrder != 0 && jOrder != 0 {
return iOrder < jOrder
}
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
rule.WriteString(`"{" space`)
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, sc.formatLiteral(propName), propRuleName))
}
rule.WriteString(` "}" space`)
return sc.addRule(ruleName, rule.String())
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule)
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
panic(fmt.Sprintf("Unrecognized schema: %v", schema))
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule)
}
}
func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) map[string]interface{} {
if !strings.HasPrefix(ref, "#/$defs/") {
panic(fmt.Sprintf("Invalid reference format: %s", ref))
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
fmt.Println(rootSchema)
panic("No definitions found in the schema")
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
fmt.Println(definitions)
panic(fmt.Sprintf("Definition not found: %s", defKey))
}
return def
}
func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
sc.visit(schema, "", schema)
return sc.finalizeGrammar(options...)
}
func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) string {
var schema map[string]interface{}
_ = json.Unmarshal(b, &schema)
return sc.Grammar(schema, options...)
}
func jsonString(v interface{}) string {
b, _ := json.Marshal(v)
return string(b)
}
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
dat, _ := json.Marshal(j)
return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
}

View File

@@ -0,0 +1,58 @@
package grammars
import (
"encoding/json"
"regexp"
)
var (
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
const (
SPACE_RULE = `" "?`
arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
)
func jsonString(v interface{}) (string, error) {
b, err := json.Marshal(v)
if err != nil {
return "", err
}
return string(b), nil
}

View File

@@ -0,0 +1,25 @@
package grammars_test
import (
"testing"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestGrammar(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Grammar test suite")
}
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}

View File

@@ -0,0 +1,220 @@
package grammars
// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
import (
"encoding/json"
"fmt"
"sort"
"strings"
)
type JSONSchemaConverter struct {
propOrder map[string]int
rules Rules
}
func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
propOrderSlice := strings.Split(propOrder, ",")
propOrderMap := make(map[string]int)
for idx, name := range propOrderSlice {
propOrderMap[name] = idx
}
rules := make(map[string]string)
rules["space"] = SPACE_RULE
return &JSONSchemaConverter{
propOrder: propOrderMap,
rules: rules,
}
}
func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped), nil
}
func (sc *JSONSchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule), nil
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema, err := sc.resolveReference(ref, rootSchema)
if err != nil {
return "", err
}
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
literal, err := sc.formatLiteral((constVal))
if err != nil {
return "", err
}
return sc.addRule(ruleName, literal), nil
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule, err := sc.formatLiteral(enumVal)
if err != nil {
return "", err
}
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule), nil
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
propOrder := sc.propOrder
var propPairs []struct {
propName string
propSchema map[string]interface{}
}
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
iOrder := propOrder[propPairs[i].propName]
jOrder := propOrder[propPairs[j].propName]
if iOrder != 0 && jOrder != 0 {
return iOrder < jOrder
}
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
rule.WriteString(`"{" space`)
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
lPropName, err := sc.formatLiteral(propName)
if err != nil {
return "", err
}
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
}
rule.WriteString(` "}" space`)
return sc.addRule(ruleName, rule.String()), nil
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
if err != nil {
return "", err
}
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule), nil
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
return "", fmt.Errorf("unrecognized schema: %v", schema)
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule), nil
}
}
func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
if !strings.HasPrefix(ref, "#/$defs/") {
return nil, fmt.Errorf("invalid reference format: %s", ref)
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
}
return def, nil
}
func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
_, err := sc.visit(schema, "", schema)
if err != nil {
return "", err
}
return sc.rules.ToGrammar(options...), nil
}
func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
var schema map[string]interface{}
err := json.Unmarshal(b, &schema)
if err != nil {
return "", err
}
return sc.Grammar(schema, options...)
}

View File

@@ -1,24 +1,14 @@
package functions_test
package grammars_test
import (
"strings"
"github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/functions/grammars"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}
var testFunctions = []Item{
{
Type: "object",
@@ -245,7 +235,8 @@ root-1-name ::= "\"search\""`
var _ = Describe("JSON schema grammar tests", func() {
Context("JSON", func() {
It("generates a valid grammar from JSON schema", func() {
grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
Expect(err).To(BeNil())
results := strings.Split(inputResult1, "\n")
for _, r := range results {
if r != "" {
@@ -255,7 +246,8 @@ var _ = Describe("JSON schema grammar tests", func() {
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
})
It("generates a valid grammar from JSON schema", func() {
grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
Expect(err).To(BeNil())
results := strings.Split(inputResult3, "\n")
for _, r := range results {
if r != "" {
@@ -269,7 +261,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctions}
grammar := structuredGrammar.Grammar()
grammar, err := structuredGrammar.Grammar()
Expect(err).To(BeNil())
results := strings.Split(inputResult1, "\n")
for _, r := range results {
if r != "" {
@@ -283,7 +276,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctions}
grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
inputResult2,
@@ -301,7 +295,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
inputResult4,
@@ -319,10 +314,11 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(
functions.SetPrefix("suffix"),
functions.EnableMaybeArray,
grammar, err := structuredGrammar.Grammar(
SetPrefix("suffix"),
EnableMaybeArray,
)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`"suffix" arr | realvalue`),
@@ -339,7 +335,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"))
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`"suffix" realvalue`),
@@ -356,7 +353,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`( "suffix" realvalue | mixedstring )`),
@@ -373,7 +371,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString, EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`( "suffix" (arr | realvalue) | mixedstring )`),
@@ -392,7 +391,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`mixedstring | arr | realvalue`),
@@ -410,7 +410,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, NoMixedFreeString)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`freestring | arr | realvalue`),
@@ -432,7 +433,8 @@ var _ = Describe("JSON schema grammar tests", func() {
realvalue
("," realvalue)*
)? "]"`
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, DisableParallelNewLines)
Expect(err).To(BeNil())
results := strings.Split(content, "\n")
for _, r := range results {
if r != "" {

View File

@@ -0,0 +1,281 @@
package grammars
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
)
type LLama31SchemaConverter struct {
fnName string
rules Rules
}
func NewLLama31SchemaConverter(fnName string) *LLama31SchemaConverter {
rules := make(map[string]string)
rules["space"] = SPACE_RULE
if fnName == "" {
fnName = "name"
}
return &LLama31SchemaConverter{
rules: rules,
fnName: fnName,
}
}
var GRAMMAR_LITERAL_ESCAPESLlama = map[string]string{
"\r": `\r`,
"\n": `\n`,
}
var GRAMMAR_LITERAL_ESCAPE_RELlama = regexp.MustCompile(`[\r\n]`)
func (sc *LLama31SchemaConverter) formatLiteral(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RELlama.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPESLlama[match]
})
return escaped, nil
}
func (sc *LLama31SchemaConverter) formatLiteralQuoted(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped), nil
}
func (sc *LLama31SchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
func (sc *LLama31SchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule), nil
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema, err := sc.resolveReference(ref, rootSchema)
if err != nil {
return "", err
}
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
literal, err := sc.formatLiteral((constVal))
if err != nil {
return "", err
}
return sc.addRule(ruleName, literal), nil
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule, err := sc.formatLiteralQuoted(enumVal)
if err != nil {
return "", err
}
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule), nil
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
baseProperty := false
depth := strings.Split(name, "-")
if len(depth) == 2 {
baseProperty = true
}
type propData []struct {
propName string
propSchema map[string]interface{}
}
var propPairs propData
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
if baseProperty {
rule.WriteString(`"<function="`)
} else {
rule.WriteString(`"{" space`)
}
if baseProperty {
namePair := propData{}
for i, propPair := range propPairs {
propName := propPair.propName
if propName == sc.fnName {
namePair = append(namePair, propPair)
// remove namePair from propPairs
propPairs = append(propPairs[:i], propPairs[i+1:]...)
break
}
}
if len(namePair) == 0 {
return "", fmt.Errorf("no function name found in the schema: %s", schema)
}
propRuleName, err := sc.visit(namePair[0].propSchema, fmt.Sprintf("%s-%s", ruleName, sc.fnName), rootSchema)
if err != nil {
return "", err
}
rule.WriteString(fmt.Sprintf(` %s ">{" `, propRuleName))
for _, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
rule.WriteString(propRuleName)
}
rule.WriteString(` "}</function>"`)
} else {
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
lPropName, err := sc.formatLiteralQuoted(propName)
if err != nil {
return "", err
}
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
}
}
if !baseProperty {
rule.WriteString(` "}" space`)
}
return sc.addRule(ruleName, rule.String()), nil
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
if err != nil {
return "", err
}
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule), nil
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
return "", fmt.Errorf("unrecognized schema: %v", schema)
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule), nil
}
}
func (sc *LLama31SchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
if !strings.HasPrefix(ref, "#/$defs/") {
return nil, fmt.Errorf("invalid reference format: %s", ref)
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
}
return def, nil
}
func (sc *LLama31SchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
_, err := sc.visit(schema, "", schema)
if err != nil {
return "", err
}
return sc.rules.ToGrammar(options...), nil
}
func (sc *LLama31SchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
var schema map[string]interface{}
err := json.Unmarshal(b, &schema)
if err != nil {
return "", err
}
return sc.Grammar(schema, options...)
}

View File

@@ -0,0 +1,76 @@
package grammars_test
import (
"strings"
. "github.com/mudler/LocalAI/pkg/functions/grammars"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
const (
testllama31Input1 = `
{
"oneOf": [
{
"type": "object",
"properties": {
"function": {"const": "create_event"},
"arguments": {
"type": "object",
"properties": {
"title": {"type": "string"},
"date": {"type": "string"},
"time": {"type": "string"}
}
}
}
},
{
"type": "object",
"properties": {
"function": {"const": "search"},
"arguments": {
"type": "object",
"properties": {
"query": {"type": "string"}
}
}
}
}
]
}`
// <function=example_function_name>{{"example_name": "example_value"}}</function>
testllama31inputResult1 = `root-0-function ::= "create_event"
freestring ::= (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space
root-0 ::= "<function=" root-0-function ">{" root-0-arguments "}</function>"
root-1-arguments ::= "{" space "\"query\"" space ":" space string "}" space
root ::= root-0 | root-1
space ::= " "?
root-0-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
root-1 ::= "<function=" root-1-function ">{" root-1-arguments "}</function>"
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
root-1-function ::= "search"`
)
var _ = Describe("JSON schema grammar tests", func() {
Context("JSON", func() {
It("generates a valid grammar from JSON schema", func() {
grammar, err := NewLLama31SchemaConverter("function").GrammarFromBytes([]byte(testllama31Input1))
Expect(err).ToNot(HaveOccurred())
results := strings.Split(testllama31inputResult1, "\n")
for _, r := range results {
if r != "" {
Expect(grammar).To(ContainSubstring(r))
}
}
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
})
})
})

View File

@@ -1,4 +1,4 @@
package functions
package grammars
type GrammarOption struct {
PropOrder string
@@ -8,6 +8,9 @@ type GrammarOption struct {
MaybeString bool
NoMixedFreeString bool
ExpectStringsAfterJSON bool
FunctionName string
SchemaType SchemaConverterType
}
func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@@ -48,3 +51,15 @@ func SetPropOrder(order string) func(*GrammarOption) {
o.PropOrder = order
}
}
func WithSchemaType(schemaType SchemaConverterType) func(*GrammarOption) {
return func(o *GrammarOption) {
o.SchemaType = schemaType
}
}
func WithFunctionName(name string) func(*GrammarOption) {
return func(o *GrammarOption) {
o.FunctionName = name
}
}

View File

@@ -0,0 +1,93 @@
package grammars
import (
"fmt"
"strings"
"github.com/mudler/LocalAI/pkg/utils"
)
type Rules map[string]string
func (rules Rules) ToGrammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
prefix := grammarOpts.Prefix
maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString
noMixedFreeString := grammarOpts.NoMixedFreeString
var lines []string
swapRoot := maybeArray || maybeString || prefix != ""
// write down the computed rules.
// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
for name, rule := range rules {
if swapRoot && name == "root" {
name = "realvalue"
}
lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
}
if !swapRoot {
return strings.Join(lines, "\n")
}
newRoot := "realvalue"
if maybeArray {
newRoot = "arr | realvalue"
}
freestringRule := "mixedstring"
if noMixedFreeString {
freestringRule = "freestring"
}
if prefix != "" {
// quote newlines in suffix
prefix = utils.EscapeNewLines(prefix)
if maybeArray && maybeString {
newRoot = "(" + newRoot + ")"
}
if maybeString {
//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
} else {
newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
}
} else if maybeString {
if maybeArray {
// newRoot = "(" + newRoot + ")"
}
newRoot = freestringRule + " | " + newRoot
}
lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
if disableParallelNewLines {
lines = append(lines, array)
} else {
lines = append(lines, arrayNewLines)
}
if maybeArray {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
}
} else {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
}
}
return strings.Join(lines, "\n")
}

View File

@@ -0,0 +1,33 @@
package grammars
type SchemaConverterType int
const (
JSONSchema SchemaConverterType = iota
LLama31Schema
)
const (
LlamaType string = "llama3.1"
JSONType string = "json"
)
func (s SchemaConverterType) String() string {
switch s {
case JSONSchema:
return JSONType
case LLama31Schema:
return LlamaType
}
return "unknown"
}
func NewType(t string) SchemaConverterType {
switch t {
case JSONType:
return JSONSchema
case LlamaType:
return LLama31Schema
}
return JSONSchema
}

View File

@@ -0,0 +1,28 @@
package functions
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)

View File

@@ -7,6 +7,7 @@ import (
"regexp"
"strings"
"github.com/mudler/LocalAI/pkg/functions/grammars"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
)
@@ -22,7 +23,9 @@ type GrammarConfig struct {
MixedMode bool `yaml:"mixed_mode"`
// NoMixedFreeString disables the mixed mode for free strings
// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects.
// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
NoMixedFreeString bool `yaml:"no_mixed_free_string"`
// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
@@ -39,6 +42,10 @@ type GrammarConfig struct {
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
PropOrder string `yaml:"properties_order"`
// SchemaType can be configured to use a specific schema type to force the grammar
// available : json, llama3.1
SchemaType string `yaml:"schema_type"`
}
// FunctionsConfig is the configuration for the tool/function call.
@@ -92,28 +99,36 @@ type FuncCallResults struct {
Arguments string
}
func (g GrammarConfig) Options() []func(o *GrammarOption) {
opts := []func(o *GrammarOption){}
if g.MixedMode {
opts = append(opts, EnableMaybeString)
func (g FunctionsConfig) GrammarOptions() []func(o *grammars.GrammarOption) {
opts := []func(o *grammars.GrammarOption){}
if g.GrammarConfig.MixedMode {
opts = append(opts, grammars.EnableMaybeString)
}
if g.ParallelCalls {
opts = append(opts, EnableMaybeArray)
if g.GrammarConfig.ParallelCalls {
opts = append(opts, grammars.EnableMaybeArray)
}
if g.DisableParallelNewLines {
opts = append(opts, DisableParallelNewLines)
if g.GrammarConfig.DisableParallelNewLines {
opts = append(opts, grammars.DisableParallelNewLines)
}
if g.Prefix != "" {
opts = append(opts, SetPrefix(g.Prefix))
if g.GrammarConfig.Prefix != "" {
opts = append(opts, grammars.SetPrefix(g.GrammarConfig.Prefix))
}
if g.NoMixedFreeString {
opts = append(opts, NoMixedFreeString)
if g.GrammarConfig.NoMixedFreeString {
opts = append(opts, grammars.NoMixedFreeString)
}
if g.ExpectStringsAfterJSON {
opts = append(opts, ExpectStringsAfterJSON)
if g.GrammarConfig.ExpectStringsAfterJSON {
opts = append(opts, grammars.ExpectStringsAfterJSON)
}
opts = append(opts, SetPropOrder(g.PropOrder))
if g.GrammarConfig.SchemaType != "" {
opts = append(opts, grammars.WithSchemaType(grammars.NewType(g.GrammarConfig.SchemaType)))
}
if g.FunctionNameKey != "" {
opts = append(opts, grammars.WithFunctionName(g.FunctionNameKey))
}
opts = append(opts, grammars.SetPropOrder(g.GrammarConfig.PropOrder))
return opts
}

View File

@@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
grpcProcess = p
foundCUDA = true
} else {
log.Info().Msgf("GPU device found but no CUDA backend present")
log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
}
}
if strings.Contains(gpu.String(), "amd") {
@@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
grpcProcess = p
foundAMDGPU = true
} else {
log.Info().Msgf("GPU device found but no HIPBLAS backend present")
log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
}
}
if strings.Contains(gpu.String(), "intel") {
@@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
grpcProcess = p
foundIntelGPU = true
} else {
log.Info().Msgf("GPU device found but no Intel backend present")
log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
}
}
}

View File

@@ -18,3 +18,15 @@ func RandString(n int) string {
}
return string(b)
}
func Unique(arr []string) []string {
unique := make(map[string]bool)
var result []string
for _, item := range arr {
if _, ok := unique[item]; !ok {
unique[item] = true
result = append(result, item)
}
}
return result
}