Compare commits

...

77 Commits

Author SHA1 Message Date
LocalAI [bot]
af0545834f chore: ⬆️ Update ggerganov/llama.cpp (#3102)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-08-01 00:55:09 +00:00
Ettore Di Giacinto
c492a9735a models(gallery): add tifa (#3099)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 17:14:46 +02:00
Ettore Di Giacinto
05c75ca617 models(gallery): add loki-base-i1 (#3098)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 17:10:31 +02:00
Ettore Di Giacinto
4c7e8f4d54 models(gallery): add meta-llama-3-instruct-12.2b-brainstorm-20x-form-8 (#3097)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 17:06:06 +02:00
Ettore Di Giacinto
115b523732 models(gallery): add tarnished-9b-i1 (#3096)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 16:09:58 +02:00
Ettore Di Giacinto
4767057088 models(gallery): add leetwizard (#3093)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 10:43:45 +02:00
Ettore Di Giacinto
33bc1e8b19 models(gallery): add gemmasutra-pro-27b-v1 (#3092)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 10:38:02 +02:00
Ettore Di Giacinto
8845524d01 models(gallery): add llama3.1-chinese-chat (#3091)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 09:36:17 +02:00
Ettore Di Giacinto
92faf5fd1d models(gallery): add seeker-9b (#3090)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 09:25:48 +02:00
Ettore Di Giacinto
2775edb3f0 models(gallery): add genius-llama3.1-i1 (#3089)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 09:21:24 +02:00
Ettore Di Giacinto
98ffc00926 models(gallery): add sunfall-simpo (#3088)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-31 09:17:10 +02:00
LocalAI [bot]
9b21f0d6ad chore: ⬆️ Update ggerganov/llama.cpp (#3086)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-30 21:55:24 +00:00
Ettore Di Giacinto
57ea7f81bb fix(ci): update openvoice checkpoints URLs (#3085)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-30 17:06:22 +02:00
Ettore Di Giacinto
274487c5eb fix(llama-cpp): do not compress with UPX (#3084)
Fixes: https://github.com/mudler/LocalAI/issues/3041

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-30 15:04:13 +02:00
Ettore Di Giacinto
17634b394b models(gallery): add meta-llama-3-instruct-8.9b-brainstorm-5x-form-11 (#3083)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-30 12:12:55 +02:00
Ettore Di Giacinto
2d59c99d31 models(gallery): add llama-guard-3-8b (#3082)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-30 12:07:52 +02:00
Ettore Di Giacinto
abcbbbed2d models(gallery): add l3.1-8b-celeste-v1.5 (#3080)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-30 10:04:47 +02:00
Ettore Di Giacinto
f1e90575f3 Revert "chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/vllm" (#3079)
Revert "chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python…"

This reverts commit 5c747a16c4.
2024-07-30 09:21:45 +02:00
Ettore Di Giacinto
a7dbeb36ca Revert "chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python/transformers" (#3078)
Revert "chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python…"

This reverts commit 3feb869025.
2024-07-30 09:21:09 +02:00
Ettore Di Giacinto
d50c72a657 Revert "chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python/transformers-musicgen" (#3077)
Revert "chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python…"

This reverts commit f822bebfd8.
2024-07-30 09:20:57 +02:00
LocalAI [bot]
12b470f00a chore: ⬆️ Update ggerganov/llama.cpp (#3075)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-30 05:28:14 +00:00
dependabot[bot]
198bc6d939 chore(deps): Bump streamlit from 1.36.0 to 1.37.0 in /examples/streamlit-bot (#3072)
chore(deps): Bump streamlit in /examples/streamlit-bot

Bumps [streamlit](https://github.com/streamlit/streamlit) from 1.36.0 to 1.37.0.
- [Release notes](https://github.com/streamlit/streamlit/releases)
- [Commits](https://github.com/streamlit/streamlit/compare/1.36.0...1.37.0)

---
updated-dependencies:
- dependency-name: streamlit
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 04:39:56 +00:00
dependabot[bot]
3feb869025 chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python/transformers (#3071)
chore(deps): Bump setuptools in /backend/python/transformers

Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 04:02:15 +00:00
dependabot[bot]
f24fac43da chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/petals (#3070)
chore(deps): Bump setuptools in /backend/python/petals

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 03:58:11 +00:00
dependabot[bot]
9c96a73d93 chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/vall-e-x (#3069)
chore(deps): Bump setuptools in /backend/python/vall-e-x

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 03:27:00 +00:00
dependabot[bot]
45233937b7 chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/coqui (#3068)
chore(deps): Bump setuptools in /backend/python/coqui

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 03:06:11 +00:00
dependabot[bot]
f822bebfd8 chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python/transformers-musicgen (#3066)
chore(deps): Bump setuptools in /backend/python/transformers-musicgen

Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 02:29:39 +00:00
dependabot[bot]
0dd02b2ad7 chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/rerankers (#3067)
chore(deps): Bump setuptools in /backend/python/rerankers

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 02:15:53 +00:00
dependabot[bot]
9948ff2715 chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/parler-tts (#3062)
chore(deps): Bump setuptools in /backend/python/parler-tts

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 01:21:56 +00:00
dependabot[bot]
0da042dc2b chore(deps): Bump chromadb from 0.5.4 to 0.5.5 in /examples/langchain-chroma (#3060)
chore(deps): Bump chromadb in /examples/langchain-chroma

Bumps [chromadb](https://github.com/chroma-core/chroma) from 0.5.4 to 0.5.5.
- [Release notes](https://github.com/chroma-core/chroma/releases)
- [Changelog](https://github.com/chroma-core/chroma/blob/main/RELEASE_PROCESS.md)
- [Commits](https://github.com/chroma-core/chroma/compare/0.5.4...0.5.5)

---
updated-dependencies:
- dependency-name: chromadb
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 01:11:05 +00:00
dependabot[bot]
5c747a16c4 chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/vllm (#3061)
chore(deps): Bump setuptools in /backend/python/vllm

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-30 00:43:12 +00:00
dependabot[bot]
40604e877c chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python/autogptq (#3048)
chore(deps): Bump setuptools in /backend/python/autogptq

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-29 21:45:52 +00:00
dependabot[bot]
3dfed64a15 chore(deps): Bump openai from 1.37.0 to 1.37.1 in /examples/langchain/langchainpy-localai-example (#3051)
chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.37.0 to 1.37.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.37.0...v1.37.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-29 21:29:08 +00:00
dependabot[bot]
e5f91fbba2 chore(deps): Bump langchain from 0.2.10 to 0.2.11 in /examples/langchain/langchainpy-localai-example (#3053)
chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.10 to 0.2.11.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.10...langchain==0.2.11)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-29 21:28:38 +00:00
Ettore Di Giacinto
4700c9df92 models(gallery): add l3.1-8b-llamoutcast-i1 (#3047)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-29 20:15:53 +02:00
Ettore Di Giacinto
6f8d6f601a models(gallery): add sekhmet_aleph-l3.1-8b-v0.1-i1 (#3046)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-29 16:45:00 +02:00
Ettore Di Giacinto
8a39707b36 models(gallery): add lumimaid-v0.2-70b-i1 (#3045)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-29 16:44:48 +02:00
Ettore Di Giacinto
e7df875db3 models(gallery): add magnum-32b-v1 (#3044)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-29 10:17:49 +02:00
LocalAI [bot]
cb042713e8 chore(model-gallery): ⬆️ update checksum (#3043)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-29 09:39:48 +02:00
Dave
7c4e526853 fix: install.sh bash specific equality check (#3038)
fix == to = for sh portability

Signed-off-by: Dave Lee <dave@gray101.com>
2024-07-29 01:19:36 +02:00
LocalAI [bot]
3a70cf311b chore(model-gallery): ⬆️ update checksum (#3040)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-28 21:53:00 +00:00
LocalAI [bot]
5d08b9ac68 docs: ⬆️ update docs version mudler/LocalAI (#3039)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-28 21:47:02 +00:00
LocalAI [bot]
86f8d5b50a chore(model-gallery): ⬆️ update checksum (#3036)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-28 11:11:23 +00:00
LocalAI [bot]
d4a3872dd9 chore: ⬆️ Update ggerganov/llama.cpp (#3034)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-28 10:46:18 +00:00
Ettore Di Giacinto
d6a7a77f6b fix(gallery): do clear out errors once displayed (#3033)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-28 10:28:10 +02:00
Ettore Di Giacinto
2a839e1432 fix(gallery): do not attempt to delete duplicate files (#3031)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-28 10:27:56 +02:00
LocalAI [bot]
610e1c00c6 chore: ⬆️ Update ggerganov/whisper.cpp (#3029)
⬆️ Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-27 21:52:57 +00:00
LocalAI [bot]
b1f93935be chore: ⬆️ Update ggerganov/llama.cpp (#3030)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-27 21:49:13 +00:00
Ettore Di Giacinto
d57acefed4 Update llama3-instruct.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 15:30:13 +02:00
Ettore Di Giacinto
0a7e4c1b93 Update llama3.1-instruct-grammar.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 15:30:01 +02:00
Ettore Di Giacinto
82cc81974f Update llama3.1-instruct.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 15:29:50 +02:00
Ettore Di Giacinto
fe0d092f58 models(gallery): add llama3 with enforced functioncall with grammars (#3027)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:48:00 +02:00
Ettore Di Giacinto
0dd21f2b5e models(gallery): add lumimaid-8b (#3026)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:41:19 +02:00
Ettore Di Giacinto
f9fad3f4ee models: re-order
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:26:23 +02:00
Ettore Di Giacinto
7021c02d45 models(gallery): add openbuddy-llama3.1-8b-v22.1-131k (#3025)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:24:45 +02:00
Ettore Di Giacinto
7aa7f13095 models(gallery): add llama-3.1-8b-instruct-fei-v1-uncensored (#3024)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:22:30 +02:00
Ettore Di Giacinto
d59bcd539e models(gallery): add llama-3.1-70b-japanese-instruct-2407 (#3023)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 12:18:55 +02:00
Ettore Di Giacinto
d5a6c1e4f6 models(gallery): add meta-llama-3.1-8b-instruct-abliterated (#3022)
* models(gallery): add meta-llama-3.1-8b-instruct-abliterated

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update gallery/index.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-27 11:00:21 +02:00
Ettore Di Giacinto
7ef8edda32 models(gallery): add darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq… (#3021)
models(gallery): add darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:59:06 +02:00
Ettore Di Giacinto
81c4b72258 models(gallery): add lumimaid-v0.2-12b (#3020)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:28:47 +02:00
Ettore Di Giacinto
fe4c8c8251 models(gallery): add llama3.1-8b-fireplace2 (#3018)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:24:56 +02:00
Ettore Di Giacinto
02d4eeffc8 models(gallery): add mistral-nemo (#3019)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-27 10:24:42 +02:00
LocalAI [bot]
80652abc9b chore: ⬆️ Update ggerganov/llama.cpp (#3016)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-26 23:26:28 +00:00
Ettore Di Giacinto
2169c3497d feat(grammar): add llama3.1 schema (#3015)
* wip

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* get rid of panics

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* expose it properly from the config

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* forgot to commit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Remove focus on test

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-26 20:11:29 +02:00
Dave
fee52942eb fix: PR title tag for checksum checker script workflow (#3014)
* fix PR title tag for checksum checker script workflow

Signed-off-by: Dave Lee <dave@gray101.com>

* Update .github/workflows/checksum_checker.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-26 08:46:57 +02:00
LocalAI [bot]
868182bc38 chore: ⬆️ Update ggerganov/llama.cpp (#3012)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-25 22:28:34 +00:00
LocalAI [bot]
ac37b47170 chore: models(gallery): ⬆️ update checksum (#3013)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-25 22:07:10 +00:00
Ettore Di Giacinto
43f49533e8 chore: add function calling template for llama 3.1 models (#3010)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 19:37:35 +02:00
Ettore Di Giacinto
3379c3d98c models(gallery): add stheno
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 19:37:15 +02:00
Ettore Di Giacinto
d605df471c models(gallery): add gemmoy (#3009)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 12:31:17 +02:00
Ettore Di Giacinto
8bf4ccf3ed Update index.yaml
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-07-25 12:23:04 +02:00
Ettore Di Giacinto
392cf15877 models(gallery): add darkidol llama3.1 (#3008)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-25 12:22:09 +02:00
Ettore Di Giacinto
5eda7f578d refactor: break down json grammar parser in different files (#3004)
* refactor: break down json grammar parser in different files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: patch to `refactor_grammars` - propagate errors (#3006)

propagate errors around

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Dave Lee <dave@gray101.com>
Co-authored-by: Dave <dave@gray101.com>
2024-07-25 08:41:00 +02:00
LocalAI [bot]
717cc6fe1a chore: ⬆️ Update ggerganov/llama.cpp (#3003)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-24 22:47:38 +00:00
LocalAI [bot]
9031d2b9eb docs: ⬆️ update docs version mudler/LocalAI (#3002)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-24 22:32:10 +00:00
Ettore Di Giacinto
4a69ef3052 models(gallery): add llama3.1-claude (#3005)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-24 23:40:08 +02:00
LocalAI [bot]
80ae919dbe chore: ⬆️ Update ggerganov/llama.cpp (#2995)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-24 15:37:08 +02:00
39 changed files with 1835 additions and 494 deletions

View File

@@ -41,7 +41,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
title: 'models(gallery): :arrow_up: update checksum'
title: 'chore(model-gallery): :arrow_up: update checksum'
branch: "update/checksum"
body: Updating checksums in gallery/index.yaml
signoff: true

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=081fe431aa8fb6307145c4feb3eed4f48cab19f8
CPPLLAMA_VERSION?=ed9d2854c9de4ae1f448334294e61167b04bec2a
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
WHISPER_CPP_VERSION?=6739eb83c3ca5cf40d24c6fe8442a761a1eb6248
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -783,9 +783,6 @@ else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
endif
ifneq ($(UPX),)
$(UPX) backend/cpp/${VARIANT}/grpc-server
endif
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -858,9 +855,6 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
ifneq ($(UPX),)
$(UPX) backend-assets/util/llama-cpp-rpc-server
endif
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
# Download checkpoints if not present
if [ ! -d "checkpoints_v2" ]; then
wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
unzip checkpoints_v2.zip
fi

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -204,35 +204,34 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
}
var filesToRemove []string
// Remove additional files
if galleryconfig != nil {
for _, f := range galleryconfig.Files {
fullPath := filepath.Join(basePath, f.Filename)
log.Debug().Msgf("Removing file %s", fullPath)
if e := os.Remove(fullPath); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
}
filesToRemove = append(filesToRemove, fullPath)
}
}
for _, f := range additionalFiles {
fullPath := filepath.Join(filepath.Join(basePath, f))
log.Debug().Msgf("Removing additional file %s", fullPath)
if e := os.Remove(fullPath); e != nil {
filesToRemove = append(filesToRemove, fullPath)
}
filesToRemove = append(filesToRemove, configFile)
filesToRemove = append(filesToRemove, galleryFile)
// skip duplicates
filesToRemove = utils.Unique(filesToRemove)
// Removing files
for _, f := range filesToRemove {
if e := os.Remove(f); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
}
}
log.Debug().Msgf("Removing model config file %s", configFile)
// Delete the model config file
if e := os.Remove(configFile); e != nil {
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
}
// Delete gallery config file
os.Remove(galleryFile)
return err
}

View File

@@ -9,7 +9,6 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/xsync"
)
const (
@@ -372,7 +371,12 @@ func dropBadChars(s string) string {
return strings.ReplaceAll(s, "@", "__")
}
func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
type ProcessTracker interface {
Exists(string) bool
Get(string) string
}
func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
modelsElements := []elem.Node{}
descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
return elem.Div(
@@ -396,7 +400,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
actionDiv := func(m *gallery.GalleryModel) elem.Node {
galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
currentlyProcessing := processing.Exists(galleryID)
currentlyProcessing := processTracker.Exists(galleryID)
jobID := ""
isDeletionOp := false
if currentlyProcessing {
@@ -404,7 +408,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
if status != nil && status.Deletion {
isDeletionOp = true
}
jobID = processing.Get(galleryID)
jobID = processTracker.Get(galleryID)
// TODO:
// case not handled, if status == nil : "Waiting"
}

View File

@@ -226,9 +226,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// Update input grammar
jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
if err == nil {
config.Grammar = g
}
case input.JSONFunctionGrammarObject != nil:
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
if err == nil {
config.Grammar = g
}
default:
// Force picking one of the functions by the request
if config.FunctionToCall() != "" {

View File

@@ -21,6 +21,40 @@ import (
"github.com/google/uuid"
)
type modelOpCache struct {
status *xsync.SyncedMap[string, string]
}
func NewModelOpCache() *modelOpCache {
return &modelOpCache{
status: xsync.NewSyncedMap[string, string](),
}
}
func (m *modelOpCache) Set(key string, value string) {
m.status.Set(key, value)
}
func (m *modelOpCache) Get(key string) string {
return m.status.Get(key)
}
func (m *modelOpCache) DeleteUUID(uuid string) {
for _, k := range m.status.Keys() {
if m.status.Get(k) == uuid {
m.status.Delete(k)
}
}
}
func (m *modelOpCache) Map() map[string]string {
return m.status.Map()
}
func (m *modelOpCache) Exists(key string) bool {
return m.status.Exists(key)
}
func RegisterUIRoutes(app *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
@@ -29,7 +63,7 @@ func RegisterUIRoutes(app *fiber.App,
auth func(*fiber.Ctx) error) {
// keeps the state of models that are being installed from the UI
var processingModels = xsync.NewSyncedMap[string, string]()
var processingModels = NewModelOpCache()
// modelStatus returns the current status of the models being processed (installation or deletion)
// it is called asynchonously from the UI
@@ -232,6 +266,8 @@ func RegisterUIRoutes(app *fiber.App,
return c.SendString(elements.ProgressBar("100"))
}
if status.Error != nil {
// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
processingModels.DeleteUUID(jobUID)
return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
}
@@ -246,12 +282,7 @@ func RegisterUIRoutes(app *fiber.App,
status := galleryService.GetStatus(jobUID)
galleryID := ""
for _, k := range processingModels.Keys() {
if processingModels.Get(k) == jobUID {
galleryID = k
processingModels.Delete(k)
}
}
processingModels.DeleteUUID(jobUID)
if galleryID == "" {
log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
}

View File

@@ -1,3 +1,3 @@
{
"version": "v2.19.1"
"version": "v2.19.3"
}

View File

@@ -194,7 +194,7 @@ install_container_toolkit_yum() {
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
$SUDO tee /etc/yum.repos.d/nvidia-container-toolkit.repo
if [ "$PACKAGE_MANAGER" == "dnf" ]; then
if [ "$PACKAGE_MANAGER" = "dnf" ]; then
$SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
else
$SUDO $PACKAGE_MANAGER -y install yum-utils
@@ -629,7 +629,7 @@ case "$ARCH" in
*) fatal "Unsupported architecture: $ARCH" ;;
esac
if [ "$OS" == "Darwin" ]; then
if [ "$OS" = "Darwin" ]; then
install_binary_darwin
exit 0
fi

View File

@@ -1,4 +1,4 @@
langchain==0.2.10
openai==1.37.0
chromadb==0.5.4
chromadb==0.5.5
llama-index==0.10.56

View File

@@ -10,7 +10,7 @@ debugpy==1.8.2
frozenlist==1.4.1
greenlet==3.0.3
idna==3.7
langchain==0.2.10
langchain==0.2.11
langchain-community==0.2.9
marshmallow==3.21.3
marshmallow-enum==1.5.1
@@ -18,7 +18,7 @@ multidict==6.0.5
mypy-extensions==1.0.0
numexpr==2.10.1
numpy==2.0.1
openai==1.37.0
openai==1.37.1
openapi-schema-pydantic==1.2.4
packaging>=23.2
pydantic==2.8.2

View File

@@ -1,2 +1,2 @@
streamlit==1.36.0
streamlit==1.37.0
requests

17
gallery/alpaca.yaml Normal file
View File

@@ -0,0 +1,17 @@
---
name: "alpaca"
config_file: |
context_size: 4096
f16: true
mmap: true
template:
chat: |
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Input}}
### Response:
completion: |
{{.Input}}

View File

@@ -1,16 +1,16 @@
---
## LLama3.1
- &llama31
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
name: "meta-llama-3.1-8b-instruct"
license: llama3.1
description: |
The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
Model developer: Meta
Model developer: Meta
Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
@@ -39,8 +39,338 @@
- filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
## Deepseek
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-instruct:grammar-functioncall"
url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
urls:
- https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
- https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
description: |
This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
overrides:
parameters:
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-claude-imat"
urls:
- https://huggingface.co/Undi95/Meta-Llama-3.1-8B-Claude
- https://huggingface.co/InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF
description: |
Meta-Llama-3.1-8B-Claude-iMat-GGUF: Quantized from Meta-Llama-3.1-8B-Claude fp16. Weighted quantizations were creating using fp16 GGUF and groups_merged.txt in 88 chunks and n_ctx=512. Static fp16 will also be included in repo. For a brief rundown of iMatrix quant performance, please see this PR. All quants are verified working prior to uploading to repo for your safety and convenience.
overrides:
parameters:
model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
files:
- filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
- !!merge <<: *llama31
name: "meta-llama-3.1-8b-instruct-abliterated"
icon: https://i.imgur.com/KhorYYG.png
urls:
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
description: |
This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
overrides:
parameters:
model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
files:
- filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
sha256: 2e1fd6d93b19cc6548b2b8ed2d3f1f34b432ee0573f3dcf358bbaab4f23c760b
- !!merge <<: *llama31
name: "llama-3.1-70b-japanese-instruct-2407"
urls:
- https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
- https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
description: |
The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
overrides:
parameters:
model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
files:
- filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *llama31
name: "openbuddy-llama3.1-8b-v22.1-131k"
icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
urls:
- https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
description: |
OpenBuddy - Open Multilingual Chatbot
overrides:
parameters:
model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
files:
- filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
- !!merge <<: *llama31
name: "llama3.1-8b-fireplace2"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/JYkaXrk2DqpXhaL9WymKY.jpeg
urls:
- https://huggingface.co/ValiantLabs/Llama3.1-8B-Fireplace2
- https://huggingface.co/mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF
description: |
Fireplace 2 is a chat model, adding helpful structured outputs to Llama 3.1 8b Instruct.
an expansion pack of supplementary outputs - request them at will within your chat:
Inline function calls
SQL queries
JSON objects
Data visualization with matplotlib
Mix normal chat and structured outputs within the same conversation.
Fireplace 2 supplements the existing strengths of Llama 3.1, providing inline capabilities within the Llama 3 Instruct format.
Version
This is the 2024-07-23 release of Fireplace 2 for Llama 3.1 8b.
We're excited to bring further upgrades and releases to Fireplace 2 in the future.
Help us and recommend Fireplace 2 to your friends!
overrides:
parameters:
model: llama3.1-8b-fireplace2-q4_k_m.gguf
files:
- filename: llama3.1-8b-fireplace2-q4_k_m.gguf
sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
- !!merge <<: *llama31
name: "sekhmet_aleph-l3.1-8b-v0.1-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/SVyiW4mu495ngqszJGWRl.png
urls:
- https://huggingface.co/Nitral-Archive/Sekhmet_Aleph-L3.1-8B-v0.1
- https://huggingface.co/mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF
overrides:
parameters:
model: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
files:
- filename: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
sha256: 5b6f4eaa2091bf13a2b563a54a3f87b22efa7f2862362537c956c70da6e11cea
uri: huggingface://mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF/Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "l3.1-8b-llamoutcast-i1"
icon: https://files.catbox.moe/ecgn0m.jpg
urls:
- https://huggingface.co/Envoid/L3.1-8B-Llamoutcast
- https://huggingface.co/mradermacher/L3.1-8B-Llamoutcast-i1-GGUF
description: |
Warning: this model is utterly cursed.
Llamoutcast
This model was originally intended to be a DADA finetune of Llama-3.1-8B-Instruct but the results were unsatisfactory. So it received some additional finetuning on a rawtext dataset and now it is utterly cursed.
It responds to Llama-3 Instruct formatting.
overrides:
parameters:
model: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
files:
- filename: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
sha256: 438ca0a7e9470f5ee40f3b14dc2da41b1cafc4ad4315dead3eb57924109d5cf6
uri: huggingface://mradermacher/L3.1-8B-Llamoutcast-i1-GGUF/L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama-guard-3-8b"
urls:
- https://huggingface.co/meta-llama/Llama-Guard-3-8B
- https://huggingface.co/QuantFactory/Llama-Guard-3-8B-GGUF
description: |
Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
Llama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.
overrides:
parameters:
model: Llama-Guard-3-8B.Q4_K_M.gguf
files:
- filename: Llama-Guard-3-8B.Q4_K_M.gguf
sha256: c5ea8760a1e544eea66a8915fcc3fbd2c67357ea2ee6871a9e6a6c33b64d4981
uri: huggingface://QuantFactory/Llama-Guard-3-8B-GGUF/Llama-Guard-3-8B.Q4_K_M.gguf
- !!merge <<: *llama31
name: "genius-llama3.1-i1"
icon: https://github.com/fangyuan-ksgk/GeniusUpload/assets/66006349/7272c93e-9806-461c-a3d0-2e50ef2b7af0
urls:
- https://huggingface.co/Ksgk-fy/Genius-Llama3.1
- https://huggingface.co/mradermacher/Genius-Llama3.1-i1-GGUF
description: |
Finetuned Llama-3.1 base on Lex Fridman's podcast transcript.
overrides:
parameters:
model: Genius-Llama3.1.i1-Q4_K_M.gguf
files:
- filename: Genius-Llama3.1.i1-Q4_K_M.gguf
sha256: a272bb2a6ab7ed565738733fb8af8e345b177eba9e76ce615ea845c25ebf8cd5
uri: huggingface://mradermacher/Genius-Llama3.1-i1-GGUF/Genius-Llama3.1.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama3.1-8b-chinese-chat"
urls:
- https://huggingface.co/shenzhi-wang/Llama3.1-8B-Chinese-Chat
- https://huggingface.co/QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF
description: |
llama3.1-8B-Chinese-Chat is an instruction-tuned language model for Chinese & English users with various abilities such as roleplaying & tool-using built upon the Meta-Llama-3.1-8B-Instruct model. Developers: [Shenzhi Wang](https://shenzhi-wang.netlify.app)*, [Yaowei Zheng](https://github.com/hiyouga)*, Guoyin Wang (in.ai), Shiji Song, Gao Huang. (*: Equal Contribution) - License: [Llama-3.1 License](https://huggingface.co/meta-llama/Meta-Llla...
m-3.1-8B/blob/main/LICENSE) - Base Model: Meta-Llama-3.1-8B-Instruct - Model Size: 8.03B - Context length: 128K(reported by [Meta-Llama-3.1-8B-Instruct model](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), untested for our Chinese model)
overrides:
parameters:
model: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
files:
- filename: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
sha256: 824847b6cca82c4d60107c6a059d80ba975a68543e6effd98880435436ddba06
uri: huggingface://QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF/Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama3.1-70b-chinese-chat"
urls:
- https://huggingface.co/shenzhi-wang/Llama3.1-70B-Chinese-Chat
- https://huggingface.co/mradermacher/Llama3.1-70B-Chinese-Chat-GGUF
description: |
"Llama3.1-70B-Chinese-Chat" is a 70-billion parameter large language model pre-trained on a large corpus of Chinese text data. It is designed for chat and dialog applications, and can generate human-like responses to various prompts and inputs. The model is based on the Llama3.1 architecture and has been fine-tuned for Chinese language understanding and generation. It can be used for a wide range of natural language processing tasks, including language translation, text summarization, question answering, and more.
overrides:
parameters:
model: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
files:
- filename: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
sha256: 395cff3cce2b092f840b68eb6e31f4c8b670bc8e3854bbb230df8334369e671d
uri: huggingface://mradermacher/Llama3.1-70B-Chinese-Chat-GGUF/Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
## Uncensored models
- !!merge <<: *llama31
name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
urls:
- https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
- https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
description: |
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
Saving money(LLama 3.1)
only test en.
Input Models input text only. Output Models generate text and code only.
Uncensored
Quick response
A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
DarkIdol:Roles that you can imagine and those that you cannot imagine.
Roleplay
Specialized in various role-playing scenarios
How To
System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
overrides:
parameters:
model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
files:
- filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
sha256: 9632316d735365087f36083dec320a71995650deb86cf74f39ab071e43114eb8
- !!merge <<: *llama31
name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png
urls:
- https://huggingface.co/LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request
description: |
Uncensored
virtual idol Twitter
https://x.com/aifeifei799
Questions
The model's response results are for reference only, please do not fully trust them.
This model is solely for learning and testing purposes, and errors in output are inevitable. We do not take responsibility for the output results. If the output content is to be used, it must be modified; if not modified, we will assume it has been altered.
For commercial licensing, please refer to the Llama 3.1 agreement.
overrides:
parameters:
model: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
files:
- filename: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
sha256: fa9fc56de7d902b755c43f1a5d0867d961675174a1b3e73a10d822836c3390e6
uri: huggingface://LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
- !!merge <<: *llama31
name: "llama-3.1-8b-instruct-fei-v1-uncensored"
icon: https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored/resolve/main/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.png
urls:
- https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored
- https://huggingface.co/mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF
description: |
Llama-3.1-8B-Instruct Uncensored
more informtion look at Llama-3.1-8B-Instruct
overrides:
parameters:
model: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
files:
- filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
sha256: 6b1985616160712eb884c34132dc0602fa4600a19075e3a7b179119b89b73f77
- !!merge <<: *llama31
name: "lumimaid-v0.2-8b"
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-8B
- https://huggingface.co/mradermacher/Lumimaid-v0.2-8B-GGUF
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/TUcHg7LKNjfo0sni88Ps7.png
description: |
This model is based on: Meta-Llama-3.1-8B-Instruct
Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: Lumimaid-v0.2-8B.Q4_K_M.gguf
files:
- filename: Lumimaid-v0.2-8B.Q4_K_M.gguf
sha256: c8024fcb49c71410903d0d076a1048249fa48b31637bac5177bf5c3f3d603d85
uri: huggingface://mradermacher/Lumimaid-v0.2-8B-GGUF/Lumimaid-v0.2-8B.Q4_K_M.gguf
- !!merge <<: *llama31
name: "lumimaid-v0.2-70b-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/HY1KTq6FMAm-CwmY8-ndO.png
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-70B
- https://huggingface.co/mradermacher/Lumimaid-v0.2-70B-i1-GGUF
description: |
This model is based on: Meta-Llama-3.1-8B-Instruct
Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
files:
- filename: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
sha256: 4857da8685cb0f3d2b8b8c91fb0c07b35b863eb7c185e93ed83ac338e095cbb5
uri: huggingface://mradermacher/Lumimaid-v0.2-70B-i1-GGUF/Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "l3.1-8b-celeste-v1.5"
icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/QcU3xEgVu18jeFtMFxIw-.webp
urls:
- https://huggingface.co/nothingiisreal/L3.1-8B-Celeste-V1.5
- https://huggingface.co/bartowski/L3.1-8B-Celeste-V1.5-GGUF
description: |
The LLM model is a large language model trained on a combination of datasets including nothingiisreal/c2-logs-cleaned, kalomaze/Opus_Instruct_25k, and nothingiisreal/Reddit-Dirty-And-WritingPrompts. The training was performed on a combination of English-language data using the Hugging Face Transformers library.
Trained on LLaMA 3.1 8B Instruct at 8K context using a new mix of Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned This version has the highest coherency and is very strong on OOC: instruct following.
overrides:
parameters:
model: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
files:
- filename: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
sha256: a408dfbbd91ed5561f70d3129af040dfd06704d6c7fa21146aa9f09714aafbc6
uri: huggingface://bartowski/L3.1-8B-Celeste-V1.5-GGUF/L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
- &deepseek
## Deepseek
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
name: "deepseek-coder-v2-lite-instruct"
icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
@@ -317,6 +647,40 @@
- filename: StellarDong-72b.i1-Q4_K_M.gguf
sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "magnum-32b-v1-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/635567189c72a7e742f1419c/PK7xRSd18Du0bX-w_t-9c.png
urls:
- https://huggingface.co/anthracite-org/magnum-32b-v1
- https://huggingface.co/mradermacher/magnum-32b-v1-i1-GGUF
description: |
This is the second in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of Qwen1.5 32B.
overrides:
parameters:
model: magnum-32b-v1.i1-Q4_K_M.gguf
files:
- filename: magnum-32b-v1.i1-Q4_K_M.gguf
sha256: a31704ce0d7e5b774f155522b9ab7ef6015a4ece4e9056bf4dfc6cac561ff0a3
uri: huggingface://mradermacher/magnum-32b-v1-i1-GGUF/magnum-32b-v1.i1-Q4_K_M.gguf
- !!merge <<: *qwen2
name: "tifa-7b-qwen2-v0.1"
urls:
- https://huggingface.co/Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF
description: |
The Tifa role-playing language model is a high-performance language model based on a self-developed 220B model distillation, with a new base model of qwen2-7B. The model has been converted to gguf format for running in the Ollama framework, providing excellent dialogue and text generation capabilities.
The original model was trained on a large-scale industrial dataset and then fine-tuned with 400GB of novel data and 20GB of multi-round dialogue directive data to achieve good role-playing effects.
The Tifa model is suitable for multi-round dialogue processing, role-playing and scenario simulation, EFX industrial knowledge integration, and high-quality literary creation.
Note: The Tifa model is in Chinese and English, with 7.6% of the data in Chinese role-playing and 4.2% in English role-playing. The model has been trained with a mix of EFX industrial field parameters and question-answer dialogues generated from 220B model outputs since 2023. The recommended quantization method is f16, as it retains more detail and accuracy in the model's performance.
overrides:
parameters:
model: tifa-7b-qwen2-v0.1.q4_k_m.gguf
files:
- filename: tifa-7b-qwen2-v0.1.q4_k_m.gguf
sha256: 1f5adbe8cb0a6400f51abdca3bf4e32284ebff73cc681a43abb35c0a6ccd3820
uri: huggingface://Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF/tifa-7b-qwen2-v0.1.q4_k_m.gguf
- &mistral03
## START Mistral
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -391,12 +755,7 @@
- gpu
- mistral
- cpu
description: |
🔬 Einstein-v4-7B
This model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.
This model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.
description: "\U0001F52C Einstein-v4-7B\n\nThis model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.\n\nThis model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.\n"
overrides:
parameters:
model: Einstein-v4-7B.Q4_K_M.gguf
@@ -404,6 +763,46 @@
- filename: Einstein-v4-7B.Q4_K_M.gguf
sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
- !!merge <<: *mistral03
name: "mistral-nemo-instruct-2407"
urls:
- https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407
- https://huggingface.co/bartowski/Mistral-Nemo-Instruct-2407-GGUF
- https://mistral.ai/news/mistral-nemo/
description: |
The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407. Trained jointly by Mistral AI and NVIDIA, it significantly outperforms existing models smaller or similar in size.
overrides:
parameters:
model: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
files:
- filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "lumimaid-v0.2-12b"
icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/ep3ojmuMkFS-GmgRuI9iB.png
urls:
- https://huggingface.co/NeverSleep/Lumimaid-v0.2-12B
- https://huggingface.co/mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF
description: |
This model is based on: Mistral-Nemo-Instruct-2407
Wandb: https://wandb.ai/undis95/Lumi-Mistral-Nemo?nw=nwuserundis95
NOTE: As explained on Mistral-Nemo-Instruct-2407 repo, it's recommended to use a low temperature, please experiment!
Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
overrides:
parameters:
model: lumimaid-v0.2-12b-q4_k_m.gguf
files:
- filename: lumimaid-v0.2-12b-q4_k_m.gguf
sha256: f72299858a07e52be920b86d42ddcfcd5008b961d601ef6fd6a98a3377adccbf
uri: huggingface://mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF/lumimaid-v0.2-12b-q4_k_m.gguf
- &mudler
### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -804,6 +1203,101 @@
- filename: EMO-2B.Q4_K_M.gguf
sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemmoy-9b-g2-mk.3-i1"
icon: https://huggingface.co/Hastagaras/G2-Gemmoy-9B-MK.3-RP/resolve/main/gemmoy.jpg
urls:
- https://huggingface.co/Hastagaras/Gemmoy-9B-G2-MK.3
- https://huggingface.co/mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF
description: |
The Gemmoy-9B-G2-MK.3 model is a large language model trained on a variety of datasets, including grimulkan/LimaRP-augmented, LDJnr/Capybara, TheSkullery/C2logs_Filtered_Sharegpt_Merged, abacusai/SystemChat-1.1, and Hastagaras/FTTS-Stories-Sharegpt.
overrides:
parameters:
model: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
files:
- filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
- !!merge <<: *gemma
name: "sunfall-simpo-9b"
urls:
- https://huggingface.co/mradermacher/sunfall-SimPO-9B-GGUF
description: |
Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
overrides:
parameters:
model: sunfall-SimPO-9B.Q4_K_M.gguf
files:
- filename: sunfall-SimPO-9B.Q4_K_M.gguf
sha256: 810c51c6ce34107706d921531b97cfa409cd53c215d18b88bce7cdb617f73ceb
uri: huggingface://mradermacher/sunfall-SimPO-9B-GGUF/sunfall-SimPO-9B.Q4_K_M.gguf
- !!merge <<: *gemma
name: "sunfall-simpo-9b-i1"
urls:
- https://huggingface.co/mradermacher/sunfall-SimPO-9B-i1-GGUF
description: |
Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
overrides:
parameters:
model: sunfall-SimPO-9B.i1-Q4_K_M.gguf
files:
- filename: sunfall-SimPO-9B.i1-Q4_K_M.gguf
sha256: edde9df372a9a5b2316dc6822dc2f52f5a2059103dd7f08072e5a5355c5f5d0b
uri: huggingface://mradermacher/sunfall-SimPO-9B-i1-GGUF/sunfall-SimPO-9B.i1-Q4_K_M.gguf
- !!merge <<: *gemma
name: "seeker-9b"
icon: https://huggingface.co/lodrick-the-lafted/seeker-9b/resolve/main/seeker.webp
urls:
- https://huggingface.co/lodrick-the-lafted/seeker-9b
- https://huggingface.co/mradermacher/seeker-9b-GGUF
description: |
The LLM model is the "Seeker-9b" model, which is a large language model trained on a diverse range of text data. It has 9 billion parameters and is based on the "lodrick-the-lafted" repository. The model is capable of generating text and can be used for a variety of natural language processing tasks such as language translation, text summarization, and text generation. It supports the English language and is available under the Apache-2.0 license.
overrides:
parameters:
model: seeker-9b.Q4_K_M.gguf
files:
- filename: seeker-9b.Q4_K_M.gguf
sha256: 7658e5bdad96dc8d232f83cff7c3fe5fa993defbfd3e728dcc7436352574a00a
uri: huggingface://mradermacher/seeker-9b-GGUF/seeker-9b.Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemmasutra-pro-27b-v1"
icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp
urls:
- https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1
- https://huggingface.co/mradermacher/Gemmasutra-Pro-27B-v1-GGUF
description: |
An RP model with impressive flexibility. Finetuned by yours truly.
overrides:
parameters:
model: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
files:
- filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218
uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
- !!merge <<: *gemma
name: "tarnished-9b-i1"
icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg
urls:
- https://huggingface.co/lodrick-the-lafted/tarnished-9b
- https://huggingface.co/mradermacher/tarnished-9b-i1-GGUF
description: |
Ah, so you've heard whispers on the winds, have you? 🧐
Imagine this:
Tarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.
It's a tongue that dances with secrets, a whisperer of lore lost and found. Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.
But be warned, Tarnished One! For knowledge comes at a price. The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.
Dare you tread this path?
overrides:
parameters:
model: tarnished-9b.i1-Q4_K_M.gguf
files:
- filename: tarnished-9b.i1-Q4_K_M.gguf
sha256: 62ab09124b3f6698bd94ef966533ae5d427d87f6bdc09f6f46917def96420a0c
uri: huggingface://mradermacher/tarnished-9b-i1-GGUF/tarnished-9b.i1-Q4_K_M.gguf
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1012,6 +1506,36 @@
- filename: llama-3-stheno-mahou-8b-q4_k_m.gguf
sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11
uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf
- !!merge <<: *llama3
name: "l3-8b-stheno-horny-v3.3-32k-q5_k_m"
urls:
- https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
- https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF
description: |
This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
We took V3.3 Stheno weights from here
And applied our lora at Alpha = 768
Thank you to Sao10K for the amazing model.
This is not legal advice. I don't put any extra licensing on my own lora.
LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
LLaMA 3 license can be found here
If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
Again, not legal advice.
overrides:
parameters:
model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
files:
- filename: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
sha256: 8d934f80ca6dbaa4852846108da92446a26715fbd5f6fc3859568850edf05262
uri: huggingface://Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF/l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
- !!merge <<: *llama3
name: "llama-3-8b-openhermes-dpo"
urls:
@@ -1904,6 +2428,81 @@
- filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
- !!merge <<: *llama3
name: "meta-llama-3-instruct-12.2b-brainstorm-20x-form-8"
urls:
- https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF
description: |
Meta-Llama-3-8B Instruct (now at 12.2B) with Brainstorm process that increases its performance at the core level for any creative use case. It has calibrations that allow it to exceed the logic solving abilities of the original model. The Brainstorm process expands the reasoning center of the LLM, reassembles and calibrates it, introducing subtle changes into the reasoning process. This enhances the model's detail, concept, connection to the "world", general concept connections, prose quality, and prose length without affecting instruction following. It improves coherence, description, simile, metaphors, emotional engagement, and takes fewer liberties with instructions while following them more closely. The model's performance is further enhanced by other technologies like "Ultra" (precision), "Neo Imatrix" (custom imatrix datasets), and "X-quants" (custom application of the imatrix process). It has been tested on multiple LLaMA2, LLaMA3, and Mistral models of various parameter sizes.
overrides:
parameters:
model: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
files:
- filename: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
sha256: 5568ab6195ab5da703f728cc118108ddcbe97255e3ba4a543b531acdf082b999
uri: huggingface://DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF/Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
- !!merge <<: *llama3
name: "loki-base-i1"
urls:
- https://huggingface.co/MrRobotoAI/Loki-base
- https://huggingface.co/mradermacher/Loki-base-i1-GGUF
description: |
Merge of several models using mergekit:
- model: abacusai/Llama-3-Smaug-8B
- model: Aculi/Llama3-Sophie
- model: ajibawa-2023/Uncensored-Frank-Llama-3-8B
- model: Blackroot/Llama-3-Gamma-Twist
- model: Casual-Autopsy/L3-Super-Nova-RP-8B
- model: Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B
- model: cgato/L3-TheSpice-8b-v0.8.3
- model: ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
- model: ChaoticNeutrals/Hathor_RP-v.01-L3-8B
- model: chargoddard/prometheus-2-llama-3-8b
- model: chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO
- model: chujiezheng/LLaMA3-iterative-DPO-final-ExPO
- model: Fizzarolli/L3-8b-Rosier-v1
- model: flammenai/Mahou-1.2a-llama3-8B
- model: HaitameLaf/Llama-3-8B-StoryGenerator
- model: HPAI-BSC/Llama3-Aloe-8B-Alpha
- model: iRyanBell/ARC1
- model: iRyanBell/ARC1-II
- model: lemon07r/Llama-3-RedMagic4-8B
- model: lemon07r/Lllama-3-RedElixir-8B
- model: Locutusque/Llama-3-Hercules-5.0-8B
- model: Magpie-Align/Llama-3-8B-Magpie-Pro-MT-SFT-v0.1
- model: maldv/badger-lambda-llama-3-8b
- model: maldv/badger-mu-llama-3-8b
- model: maldv/badger-writer-llama-3-8b
- model: mlabonne/NeuralDaredevil-8B-abliterated
- model: MrRobotoAI/Fiction-Writer-6
- model: MrRobotoAI/Unholy-Thoth-8B-v2
- model: nbeerbower/llama-3-spicy-abliterated-stella-8B
- model: NeverSleep/Llama-3-Lumimaid-8B-v0.1
- model: NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS
- model: Nitral-AI/Hathor_Sofit-L3-8B-v1
- model: Nitral-AI/Hathor_Stable-v0.2-L3-8B
- model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85
- model: Nitral-AI/Poppy_Porpoise-0.72-L3-8B
- model: nothingiisreal/L3-8B-Instruct-Abliterated-DWP
- model: nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
- model: NousResearch/Hermes-2-Theta-Llama-3-8B
- model: OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0
- model: refuelai/Llama-3-Refueled
- model: ResplendentAI/Nymph_8B
- model: shauray/Llama3-8B-DPO-uncensored
- model: SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
- model: TIGER-Lab/MAmmoTH2-8B-Plus
- model: Undi95/Llama-3-LewdPlay-8B
- model: Undi95/Meta-Llama-3-8B-hf
- model: VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct
- model: WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
overrides:
parameters:
model: Loki-base.i1-Q4_K_M.gguf
files:
- filename: Loki-base.i1-Q4_K_M.gguf
sha256: 60a4357fa399bfd18aa841cc529da09439791331d117a4f06f0467d002b385bb
uri: huggingface://mradermacher/Loki-base-i1-GGUF/Loki-base.i1-Q4_K_M.gguf
- &dolphin
name: "dolphin-2.9-llama3-8b"
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -3063,7 +3662,6 @@
- filename: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
sha256: 71fef02915c606b438ccff2cae6b7760bbb54a558d5f2d39c2421d97b6682fea
uri: huggingface://QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF/ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
- !!merge <<: *llama3
name: "llama-3-ezo-8b-common-it"
icon: https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
@@ -3071,11 +3669,11 @@
- https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
- https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF
description: |
Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
overrides:
parameters:
model: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf
@@ -3204,7 +3802,6 @@
- filename: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
sha256: ecbd57783006f1a027f8a7f5a5d551dc8b3568912825f566d79fd34a804e8970
uri: huggingface://mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF/L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-15b-etherealmaid-t0.0001-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/FwYXt2h_FdmlL0Z6qYufz.png
@@ -3326,6 +3923,19 @@
- filename: calme-2.4-llama3-70b.Q4_K_M.gguf
sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
- !!merge <<: *llama3
name: "meta-llama-3-instruct-8.9b-brainstorm-5x-form-11"
urls:
- https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF
description: |
Meta-Llama-3-8B Instruct (now at 8.9B) is an enhanced version of the LLM model, specifically designed for creative use cases such as story writing, roleplaying, and fiction. This model has been augmented through the "Brainstorm" process, which involves expanding and calibrating the reasoning center of the LLM to improve its performance in various creative tasks. The enhancements brought by this process include more detailed and nuanced descriptions, stronger prose, and a greater sense of immersion in the story. The model is capable of generating long and vivid content, with fewer clichés and more focused, coherent narratives. Users can provide more instructions and details to elicit stronger and more engaging responses from the model. The "Brainstorm" process has been tested on multiple LLM models, including Llama2, Llama3, and Mistral, as well as on individual models like Llama3 Instruct, Mistral Instruct, and custom fine-tuned models.
overrides:
parameters:
model: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
files:
- filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98
uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
- &command-R
### START Command-r
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -3568,8 +4178,8 @@
model: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
files:
- filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
sha256: d6d25bf078321bea4a079c727b273cb0b5a2e0b4cf3add0f7a2c8e43075c414f
- !!merge <<: *phi-3
name: "phillama-3.8b-v0.1"
icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png
@@ -3894,6 +4504,28 @@
- filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
- !!merge <<: *codellama
url: "github:mudler/LocalAI/gallery/alpaca.yaml@master"
icon: https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1/resolve/main/LeetCodeWizardLogo.png
name: "leetcodewizard_7b_v1.1-i1"
urls:
- https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1
- https://huggingface.co/mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF
description: |
LeetCodeWizard is a coding large language model specifically trained to solve and explain Leetcode (or any) programming problems.
This model is a fine-tuned version of the WizardCoder-Python-7B with a dataset of Leetcode problems\
Model capabilities:
It should be able to solve most of the problems found at Leetcode and even pass the sample interviews they offer on the site.
It can write both the code and the explanations for the solutions.
overrides:
parameters:
model: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
files:
- filename: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
sha256: 19720d8e1ba89d32c6f88ed6518caf0251f9e3ec011297929c801efc5ea979f4
uri: huggingface://mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF/LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
- &llm-compiler
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
name: "llm-compiler-13b-imat"

View File

@@ -31,7 +31,7 @@ config_file: |
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Function call:
chat: |
<|begin_of_text|>{{.Input }}
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}

View File

@@ -0,0 +1,64 @@
---
name: "llama3-instruct-grammar"
config_file: |
mmap: true
function:
disable_no_action: true
grammar:
no_mixed_free_string: true
mixed_mode: true
schema_type: llama3.1 # or JSON is supported too (json)
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
- <|end_of_text|>

View File

@@ -0,0 +1,62 @@
---
name: "llama3-instruct"
config_file: |
mmap: true
function:
disable_no_action: true
grammar:
disable: true
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
- <|end_of_text|>

View File

@@ -0,0 +1,43 @@
package functions
import (
"encoding/json"
"github.com/mudler/LocalAI/pkg/functions/grammars"
)
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*grammars.GrammarOption)) (string, error) {
grammarOpts := &grammars.GrammarOption{}
grammarOpts.Apply(options...)
dat, err := json.Marshal(j)
if err != nil {
return "", err
}
converter := NewSchemaConverter(*grammarOpts)
return converter.GrammarFromBytes(dat, options...)
}
type SchemaConverter interface {
GrammarFromBytes([]byte, ...func(*grammars.GrammarOption)) (string, error)
}
func NewSchemaConverter(opt grammars.GrammarOption) SchemaConverter {
switch {
case opt.SchemaType == grammars.LLama31Schema:
return grammars.NewLLama31SchemaConverter(opt.FunctionName)
}
return grammars.NewJSONSchemaConverter(opt.PropOrder)
}

View File

@@ -18,6 +18,15 @@ type Function struct {
}
type Functions []Function
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Tool struct {
Type string `json:"type"`
Function Function `json:"function,omitempty"`

View File

@@ -1,4 +1,4 @@
package functions
package functions_test
import (
"testing"
@@ -7,7 +7,7 @@ import (
. "github.com/onsi/gomega"
)
func TestGrammar(t *testing.T) {
func TestFunctions(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Grammar test suite")
RunSpecs(t, "Functions test suite")
}

View File

@@ -1,378 +0,0 @@
package functions
// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
"github.com/mudler/LocalAI/pkg/utils"
)
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)
var (
SPACE_RULE = `" "?`
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
type JSONSchemaConverter struct {
propOrder map[string]int
rules map[string]string
}
func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
propOrderSlice := strings.Split(propOrder, ",")
propOrderMap := make(map[string]int)
for idx, name := range propOrderSlice {
propOrderMap[name] = idx
}
rules := make(map[string]string)
rules["space"] = SPACE_RULE
return &JSONSchemaConverter{
propOrder: propOrderMap,
rules: rules,
}
}
func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) string {
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jsonString(literal), func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped)
}
func (sc *JSONSchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
const arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
const array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
prefix := grammarOpts.Prefix
maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString
noMixedFreeString := grammarOpts.NoMixedFreeString
var lines []string
swapRoot := maybeArray || maybeString || prefix != ""
// write down the computed rules.
// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
for name, rule := range sc.rules {
if swapRoot && name == "root" {
name = "realvalue"
}
lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
}
if !swapRoot {
return strings.Join(lines, "\n")
}
newRoot := "realvalue"
if maybeArray {
newRoot = "arr | realvalue"
}
freestringRule := "mixedstring"
if noMixedFreeString {
freestringRule = "freestring"
}
if prefix != "" {
// quote newlines in suffix
prefix = utils.EscapeNewLines(prefix)
if maybeArray && maybeString {
newRoot = "(" + newRoot + ")"
}
if maybeString {
//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
} else {
newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
}
} else if maybeString {
if maybeArray {
// newRoot = "(" + newRoot + ")"
}
newRoot = freestringRule + " | " + newRoot
}
lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
if disableParallelNewLines {
lines = append(lines, array)
} else {
lines = append(lines, arrayNewLines)
}
if maybeArray {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
}
} else {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
}
}
return strings.Join(lines, "\n")
}
func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) string {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule)
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema := sc.resolveReference(ref, rootSchema)
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
return sc.addRule(ruleName, sc.formatLiteral(constVal))
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule := sc.formatLiteral(enumVal)
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule)
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
propOrder := sc.propOrder
var propPairs []struct {
propName string
propSchema map[string]interface{}
}
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
iOrder := propOrder[propPairs[i].propName]
jOrder := propOrder[propPairs[j].propName]
if iOrder != 0 && jOrder != 0 {
return iOrder < jOrder
}
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
rule.WriteString(`"{" space`)
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, sc.formatLiteral(propName), propRuleName))
}
rule.WriteString(` "}" space`)
return sc.addRule(ruleName, rule.String())
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule)
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
panic(fmt.Sprintf("Unrecognized schema: %v", schema))
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule)
}
}
func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) map[string]interface{} {
if !strings.HasPrefix(ref, "#/$defs/") {
panic(fmt.Sprintf("Invalid reference format: %s", ref))
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
fmt.Println(rootSchema)
panic("No definitions found in the schema")
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
fmt.Println(definitions)
panic(fmt.Sprintf("Definition not found: %s", defKey))
}
return def
}
func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
sc.visit(schema, "", schema)
return sc.finalizeGrammar(options...)
}
func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) string {
var schema map[string]interface{}
_ = json.Unmarshal(b, &schema)
return sc.Grammar(schema, options...)
}
func jsonString(v interface{}) string {
b, _ := json.Marshal(v)
return string(b)
}
type FunctionName struct {
Const string `json:"const"`
}
type Argument struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type Item struct {
Type string `json:"type"`
Properties map[string]interface{} `json:"properties"`
}
type JSONFunctionStructure struct {
OneOf []Item `json:"oneOf,omitempty"`
AnyOf []Item `json:"anyOf,omitempty"`
Defs map[string]interface{} `json:"$defs,omitempty"`
}
func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
dat, _ := json.Marshal(j)
return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
}

View File

@@ -0,0 +1,58 @@
package grammars
import (
"encoding/json"
"regexp"
)
var (
PRIMITIVE_RULES = map[string]string{
"boolean": `("true" | "false") space`,
"number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
"string": `"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`,
"null": `"null" space`,
}
INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
GRAMMAR_LITERAL_ESCAPES = map[string]string{
"\r": `\r`,
"\n": `\n`,
`"`: `\"`,
}
)
const (
SPACE_RULE = `" "?`
arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`
array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
)
func jsonString(v interface{}) (string, error) {
b, err := json.Marshal(v)
if err != nil {
return "", err
}
return string(b), nil
}

View File

@@ -0,0 +1,25 @@
package grammars_test
import (
"testing"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestGrammar(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Grammar test suite")
}
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}

View File

@@ -0,0 +1,220 @@
package grammars
// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
import (
"encoding/json"
"fmt"
"sort"
"strings"
)
type JSONSchemaConverter struct {
propOrder map[string]int
rules Rules
}
func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
propOrderSlice := strings.Split(propOrder, ",")
propOrderMap := make(map[string]int)
for idx, name := range propOrderSlice {
propOrderMap[name] = idx
}
rules := make(map[string]string)
rules["space"] = SPACE_RULE
return &JSONSchemaConverter{
propOrder: propOrderMap,
rules: rules,
}
}
func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped), nil
}
func (sc *JSONSchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule), nil
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema, err := sc.resolveReference(ref, rootSchema)
if err != nil {
return "", err
}
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
literal, err := sc.formatLiteral((constVal))
if err != nil {
return "", err
}
return sc.addRule(ruleName, literal), nil
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule, err := sc.formatLiteral(enumVal)
if err != nil {
return "", err
}
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule), nil
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
propOrder := sc.propOrder
var propPairs []struct {
propName string
propSchema map[string]interface{}
}
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
iOrder := propOrder[propPairs[i].propName]
jOrder := propOrder[propPairs[j].propName]
if iOrder != 0 && jOrder != 0 {
return iOrder < jOrder
}
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
rule.WriteString(`"{" space`)
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
lPropName, err := sc.formatLiteral(propName)
if err != nil {
return "", err
}
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
}
rule.WriteString(` "}" space`)
return sc.addRule(ruleName, rule.String()), nil
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
if err != nil {
return "", err
}
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule), nil
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
return "", fmt.Errorf("unrecognized schema: %v", schema)
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule), nil
}
}
func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
if !strings.HasPrefix(ref, "#/$defs/") {
return nil, fmt.Errorf("invalid reference format: %s", ref)
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
}
return def, nil
}
func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
_, err := sc.visit(schema, "", schema)
if err != nil {
return "", err
}
return sc.rules.ToGrammar(options...), nil
}
func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
var schema map[string]interface{}
err := json.Unmarshal(b, &schema)
if err != nil {
return "", err
}
return sc.Grammar(schema, options...)
}

View File

@@ -1,24 +1,14 @@
package functions_test
package grammars_test
import (
"strings"
"github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/functions/grammars"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
property := map[string]interface{}{}
property[field1] = FunctionName{Const: name}
property[field2] = Argument{
Type: "object",
Properties: properties,
}
return property
}
var testFunctions = []Item{
{
Type: "object",
@@ -245,7 +235,8 @@ root-1-name ::= "\"search\""`
var _ = Describe("JSON schema grammar tests", func() {
Context("JSON", func() {
It("generates a valid grammar from JSON schema", func() {
grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
Expect(err).To(BeNil())
results := strings.Split(inputResult1, "\n")
for _, r := range results {
if r != "" {
@@ -255,7 +246,8 @@ var _ = Describe("JSON schema grammar tests", func() {
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
})
It("generates a valid grammar from JSON schema", func() {
grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
Expect(err).To(BeNil())
results := strings.Split(inputResult3, "\n")
for _, r := range results {
if r != "" {
@@ -269,7 +261,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctions}
grammar := structuredGrammar.Grammar()
grammar, err := structuredGrammar.Grammar()
Expect(err).To(BeNil())
results := strings.Split(inputResult1, "\n")
for _, r := range results {
if r != "" {
@@ -283,7 +276,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctions}
grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
inputResult2,
@@ -301,7 +295,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
inputResult4,
@@ -319,10 +314,11 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(
functions.SetPrefix("suffix"),
functions.EnableMaybeArray,
grammar, err := structuredGrammar.Grammar(
SetPrefix("suffix"),
EnableMaybeArray,
)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`"suffix" arr | realvalue`),
@@ -339,7 +335,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"))
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`"suffix" realvalue`),
@@ -356,7 +353,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`( "suffix" realvalue | mixedstring )`),
@@ -373,7 +371,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString, EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`( "suffix" (arr | realvalue) | mixedstring )`),
@@ -392,7 +391,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`mixedstring | arr | realvalue`),
@@ -410,7 +410,8 @@ var _ = Describe("JSON schema grammar tests", func() {
structuredGrammar := JSONFunctionStructure{
OneOf: testFunctionsName}
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, NoMixedFreeString)
Expect(err).To(BeNil())
results := strings.Split(
strings.Join([]string{
rootResult(`freestring | arr | realvalue`),
@@ -432,7 +433,8 @@ var _ = Describe("JSON schema grammar tests", func() {
realvalue
("," realvalue)*
)? "]"`
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, DisableParallelNewLines)
Expect(err).To(BeNil())
results := strings.Split(content, "\n")
for _, r := range results {
if r != "" {

View File

@@ -0,0 +1,281 @@
package grammars
import (
"encoding/json"
"fmt"
"regexp"
"sort"
"strings"
)
type LLama31SchemaConverter struct {
fnName string
rules Rules
}
func NewLLama31SchemaConverter(fnName string) *LLama31SchemaConverter {
rules := make(map[string]string)
rules["space"] = SPACE_RULE
if fnName == "" {
fnName = "name"
}
return &LLama31SchemaConverter{
rules: rules,
fnName: fnName,
}
}
var GRAMMAR_LITERAL_ESCAPESLlama = map[string]string{
"\r": `\r`,
"\n": `\n`,
}
var GRAMMAR_LITERAL_ESCAPE_RELlama = regexp.MustCompile(`[\r\n]`)
func (sc *LLama31SchemaConverter) formatLiteral(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RELlama.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPESLlama[match]
})
return escaped, nil
}
func (sc *LLama31SchemaConverter) formatLiteralQuoted(literal interface{}) (string, error) {
jLiteral, err := jsonString(literal)
if err != nil {
return "", err
}
escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
return GRAMMAR_LITERAL_ESCAPES[match]
})
return fmt.Sprintf(`"%s"`, escaped), nil
}
func (sc *LLama31SchemaConverter) addRule(name, rule string) string {
escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
key := escName
if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
i := 0
for {
key = fmt.Sprintf("%s%d", escName, i)
if _, ok := sc.rules[key]; !ok {
break
}
i++
}
}
sc.rules[key] = rule
return key
}
func (sc *LLama31SchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
st, existType := schema["type"]
var schemaType string
if existType {
schemaType = st.(string)
}
ruleName := name
if name == "" {
ruleName = "root"
}
_, oneOfExists := schema["oneOf"]
_, anyOfExists := schema["anyOf"]
if oneOfExists || anyOfExists {
var alternatives []string
oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
if oneOfExists {
for i, altSchema := range oneOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
} else if anyOfExists {
for i, altSchema := range anyOfSchemas {
alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
if err != nil {
return "", err
}
alternatives = append(alternatives, alternative)
}
}
rule := strings.Join(alternatives, " | ")
return sc.addRule(ruleName, rule), nil
} else if ref, exists := schema["$ref"].(string); exists {
referencedSchema, err := sc.resolveReference(ref, rootSchema)
if err != nil {
return "", err
}
return sc.visit(referencedSchema, name, rootSchema)
} else if constVal, exists := schema["const"]; exists {
literal, err := sc.formatLiteral((constVal))
if err != nil {
return "", err
}
return sc.addRule(ruleName, literal), nil
} else if enumVals, exists := schema["enum"].([]interface{}); exists {
var enumRules []string
for _, enumVal := range enumVals {
enumRule, err := sc.formatLiteralQuoted(enumVal)
if err != nil {
return "", err
}
enumRules = append(enumRules, enumRule)
}
rule := strings.Join(enumRules, " | ")
return sc.addRule(ruleName, rule), nil
} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
baseProperty := false
depth := strings.Split(name, "-")
if len(depth) == 2 {
baseProperty = true
}
type propData []struct {
propName string
propSchema map[string]interface{}
}
var propPairs propData
for propName, propSchema := range properties {
propPairs = append(propPairs, struct {
propName string
propSchema map[string]interface{}
}{propName: propName, propSchema: propSchema.(map[string]interface{})})
}
sort.Slice(propPairs, func(i, j int) bool {
return propPairs[i].propName < propPairs[j].propName
})
var rule strings.Builder
if baseProperty {
rule.WriteString(`"<function="`)
} else {
rule.WriteString(`"{" space`)
}
if baseProperty {
namePair := propData{}
for i, propPair := range propPairs {
propName := propPair.propName
if propName == sc.fnName {
namePair = append(namePair, propPair)
// remove namePair from propPairs
propPairs = append(propPairs[:i], propPairs[i+1:]...)
break
}
}
if len(namePair) == 0 {
return "", fmt.Errorf("no function name found in the schema: %s", schema)
}
propRuleName, err := sc.visit(namePair[0].propSchema, fmt.Sprintf("%s-%s", ruleName, sc.fnName), rootSchema)
if err != nil {
return "", err
}
rule.WriteString(fmt.Sprintf(` %s ">{" `, propRuleName))
for _, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
rule.WriteString(propRuleName)
}
rule.WriteString(` "}</function>"`)
} else {
for i, propPair := range propPairs {
propName := propPair.propName
propSchema := propPair.propSchema
propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
if err != nil {
return "", err
}
lPropName, err := sc.formatLiteralQuoted(propName)
if err != nil {
return "", err
}
if i > 0 {
rule.WriteString(` "," space`)
}
rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
}
}
if !baseProperty {
rule.WriteString(` "}" space`)
}
return sc.addRule(ruleName, rule.String()), nil
} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
if err != nil {
return "", err
}
rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
return sc.addRule(ruleName, rule), nil
} else {
primitiveRule, exists := PRIMITIVE_RULES[schemaType]
if !exists {
return "", fmt.Errorf("unrecognized schema: %v", schema)
}
if ruleName == "root" {
schemaType = "root"
}
return sc.addRule(schemaType, primitiveRule), nil
}
}
func (sc *LLama31SchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
if !strings.HasPrefix(ref, "#/$defs/") {
return nil, fmt.Errorf("invalid reference format: %s", ref)
}
defKey := strings.TrimPrefix(ref, "#/$defs/")
definitions, exists := rootSchema["$defs"].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
}
def, exists := definitions[defKey].(map[string]interface{})
if !exists {
return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
}
return def, nil
}
func (sc *LLama31SchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
_, err := sc.visit(schema, "", schema)
if err != nil {
return "", err
}
return sc.rules.ToGrammar(options...), nil
}
func (sc *LLama31SchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
var schema map[string]interface{}
err := json.Unmarshal(b, &schema)
if err != nil {
return "", err
}
return sc.Grammar(schema, options...)
}

View File

@@ -0,0 +1,76 @@
package grammars_test
import (
"strings"
. "github.com/mudler/LocalAI/pkg/functions/grammars"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
const (
testllama31Input1 = `
{
"oneOf": [
{
"type": "object",
"properties": {
"function": {"const": "create_event"},
"arguments": {
"type": "object",
"properties": {
"title": {"type": "string"},
"date": {"type": "string"},
"time": {"type": "string"}
}
}
}
},
{
"type": "object",
"properties": {
"function": {"const": "search"},
"arguments": {
"type": "object",
"properties": {
"query": {"type": "string"}
}
}
}
}
]
}`
// <function=example_function_name>{{"example_name": "example_value"}}</function>
testllama31inputResult1 = `root-0-function ::= "create_event"
freestring ::= (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space
root-0 ::= "<function=" root-0-function ">{" root-0-arguments "}</function>"
root-1-arguments ::= "{" space "\"query\"" space ":" space string "}" space
root ::= root-0 | root-1
space ::= " "?
root-0-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
root-1 ::= "<function=" root-1-function ">{" root-1-arguments "}</function>"
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
root-1-function ::= "search"`
)
var _ = Describe("JSON schema grammar tests", func() {
Context("JSON", func() {
It("generates a valid grammar from JSON schema", func() {
grammar, err := NewLLama31SchemaConverter("function").GrammarFromBytes([]byte(testllama31Input1))
Expect(err).ToNot(HaveOccurred())
results := strings.Split(testllama31inputResult1, "\n")
for _, r := range results {
if r != "" {
Expect(grammar).To(ContainSubstring(r))
}
}
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
})
})
})

View File

@@ -1,4 +1,4 @@
package functions
package grammars
type GrammarOption struct {
PropOrder string
@@ -8,6 +8,9 @@ type GrammarOption struct {
MaybeString bool
NoMixedFreeString bool
ExpectStringsAfterJSON bool
FunctionName string
SchemaType SchemaConverterType
}
func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@@ -48,3 +51,15 @@ func SetPropOrder(order string) func(*GrammarOption) {
o.PropOrder = order
}
}
func WithSchemaType(schemaType SchemaConverterType) func(*GrammarOption) {
return func(o *GrammarOption) {
o.SchemaType = schemaType
}
}
func WithFunctionName(name string) func(*GrammarOption) {
return func(o *GrammarOption) {
o.FunctionName = name
}
}

View File

@@ -0,0 +1,93 @@
package grammars
import (
"fmt"
"strings"
"github.com/mudler/LocalAI/pkg/utils"
)
type Rules map[string]string
func (rules Rules) ToGrammar(options ...func(*GrammarOption)) string {
grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)
prefix := grammarOpts.Prefix
maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString
noMixedFreeString := grammarOpts.NoMixedFreeString
var lines []string
swapRoot := maybeArray || maybeString || prefix != ""
// write down the computed rules.
// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
for name, rule := range rules {
if swapRoot && name == "root" {
name = "realvalue"
}
lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
}
if !swapRoot {
return strings.Join(lines, "\n")
}
newRoot := "realvalue"
if maybeArray {
newRoot = "arr | realvalue"
}
freestringRule := "mixedstring"
if noMixedFreeString {
freestringRule = "freestring"
}
if prefix != "" {
// quote newlines in suffix
prefix = utils.EscapeNewLines(prefix)
if maybeArray && maybeString {
newRoot = "(" + newRoot + ")"
}
if maybeString {
//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
} else {
newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
}
} else if maybeString {
if maybeArray {
// newRoot = "(" + newRoot + ")"
}
newRoot = freestringRule + " | " + newRoot
}
lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
if disableParallelNewLines {
lines = append(lines, array)
} else {
lines = append(lines, arrayNewLines)
}
if maybeArray {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
}
} else {
if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
}
}
return strings.Join(lines, "\n")
}

View File

@@ -0,0 +1,33 @@
package grammars
type SchemaConverterType int
const (
JSONSchema SchemaConverterType = iota
LLama31Schema
)
const (
LlamaType string = "llama3.1"
JSONType string = "json"
)
func (s SchemaConverterType) String() string {
switch s {
case JSONSchema:
return JSONType
case LLama31Schema:
return LlamaType
}
return "unknown"
}
func NewType(t string) SchemaConverterType {
switch t {
case JSONType:
return JSONSchema
case LlamaType:
return LLama31Schema
}
return JSONSchema
}

View File

@@ -0,0 +1,28 @@
package functions
const (
JSONBNF = `root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?`
)

View File

@@ -7,6 +7,7 @@ import (
"regexp"
"strings"
"github.com/mudler/LocalAI/pkg/functions/grammars"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
)
@@ -22,7 +23,9 @@ type GrammarConfig struct {
MixedMode bool `yaml:"mixed_mode"`
// NoMixedFreeString disables the mixed mode for free strings
// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects.
// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
NoMixedFreeString bool `yaml:"no_mixed_free_string"`
// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
@@ -39,6 +42,10 @@ type GrammarConfig struct {
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
PropOrder string `yaml:"properties_order"`
// SchemaType can be configured to use a specific schema type to force the grammar
// available : json, llama3.1
SchemaType string `yaml:"schema_type"`
}
// FunctionsConfig is the configuration for the tool/function call.
@@ -92,28 +99,36 @@ type FuncCallResults struct {
Arguments string
}
func (g GrammarConfig) Options() []func(o *GrammarOption) {
opts := []func(o *GrammarOption){}
if g.MixedMode {
opts = append(opts, EnableMaybeString)
func (g FunctionsConfig) GrammarOptions() []func(o *grammars.GrammarOption) {
opts := []func(o *grammars.GrammarOption){}
if g.GrammarConfig.MixedMode {
opts = append(opts, grammars.EnableMaybeString)
}
if g.ParallelCalls {
opts = append(opts, EnableMaybeArray)
if g.GrammarConfig.ParallelCalls {
opts = append(opts, grammars.EnableMaybeArray)
}
if g.DisableParallelNewLines {
opts = append(opts, DisableParallelNewLines)
if g.GrammarConfig.DisableParallelNewLines {
opts = append(opts, grammars.DisableParallelNewLines)
}
if g.Prefix != "" {
opts = append(opts, SetPrefix(g.Prefix))
if g.GrammarConfig.Prefix != "" {
opts = append(opts, grammars.SetPrefix(g.GrammarConfig.Prefix))
}
if g.NoMixedFreeString {
opts = append(opts, NoMixedFreeString)
if g.GrammarConfig.NoMixedFreeString {
opts = append(opts, grammars.NoMixedFreeString)
}
if g.ExpectStringsAfterJSON {
opts = append(opts, ExpectStringsAfterJSON)
if g.GrammarConfig.ExpectStringsAfterJSON {
opts = append(opts, grammars.ExpectStringsAfterJSON)
}
opts = append(opts, SetPropOrder(g.PropOrder))
if g.GrammarConfig.SchemaType != "" {
opts = append(opts, grammars.WithSchemaType(grammars.NewType(g.GrammarConfig.SchemaType)))
}
if g.FunctionNameKey != "" {
opts = append(opts, grammars.WithFunctionName(g.FunctionNameKey))
}
opts = append(opts, grammars.SetPropOrder(g.GrammarConfig.PropOrder))
return opts
}

View File

@@ -18,3 +18,15 @@ func RandString(n int) string {
}
return string(b)
}
func Unique(arr []string) []string {
unique := make(map[string]bool)
var result []string
for _, item := range arr {
if _, ok := unique[item]; !ok {
unique[item] = true
result = append(result, item)
}
}
return result
}