Compare commits

..

31 Commits

Author SHA1 Message Date
LocalAI [bot]
902e47f0b0 chore: ⬆️ Update ggml-org/llama.cpp to 0320ac5264279d74f8ee91bafa6c90e9ab9bbb91 (#6306)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-18 09:27:18 +02:00
Ettore Di Giacinto
50bb78fd24 Add permissions for issues and actions
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-09-18 09:26:10 +02:00
LocalAI [bot]
542f07ab2d docs: ⬆️ update docs version mudler/LocalAI (#6305)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-17 21:06:50 +00:00
Ettore Di Giacinto
77c5acb9db Revert "feat(nvidia-gpu): bump images to cuda 12.8" (#6303)
Revert "feat(nvidia-gpu): bump images to cuda 12.8 (#6239)"

This reverts commit d9e25af7b5.
2025-09-17 19:31:43 +02:00
Ettore Di Giacinto
44bbf4d778 chore(model gallery): add websailor-7b (#6300)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-17 09:49:58 +02:00
Ettore Di Giacinto
633c12f93d chore(model gallery): add websailor-32b (#6299)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-17 09:48:16 +02:00
Ettore Di Giacinto
6f24135f1d chore(model gallery): add webwatcher-32b (#6298)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-17 09:42:54 +02:00
Ettore Di Giacinto
b72aa7b4fa chore(model gallery): add webwatcher-7b (#6297)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-17 09:36:25 +02:00
Ettore Di Giacinto
e94e725479 chore(model gallery): add alibaba-nlp_tongyi-deepresearch-30b-a3b (#6295)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-17 09:22:19 +02:00
LocalAI [bot]
e4ac7b14a3 chore: ⬆️ Update ggml-org/llama.cpp to 8ff206097c2bf3ca1c7aa95f9d6db779fc7bdd68 (#6292)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-16 21:09:47 +00:00
Ettore Di Giacinto
ddb39c73f2 chore(model gallery): add holo1.5-3b (#6291)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-16 18:13:11 +02:00
Ettore Di Giacinto
264b09fb1e chore(model gallery): add holo1.5-7b (#6290)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-16 18:10:27 +02:00
Ettore Di Giacinto
36dd45df51 chore(model gallery): add holo1.5-72b (#6289)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-16 18:07:50 +02:00
Ettore Di Giacinto
e5599f87b8 chore(model gallery): add k2-think-i1 (#6288)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-16 18:05:01 +02:00
LocalAI [bot]
e89b5cc0e3 chore: ⬆️ Update ggml-org/llama.cpp to b907255f4bd169b0dc7dca9553b4c54af5170865 (#6287)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-16 08:10:37 +02:00
Richard Palethorpe
10bf1084cc chore: ⬆️ Update leejet/stable-diffusion.cpp to 0ebe6fe118f125665939b27c89f34ed38716bff8 (#6271)
* ⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* fix(stablediffusion-ggml): Move parameters and start refactor of passing params

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* fix(stablediffusion-ggml): Add default sampler option

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-15 21:42:46 +02:00
Ettore Di Giacinto
b08ae559b3 chore(model gallery): add qwen3-stargate-sg1-uncensored-abliterated-8b-i1 (#6270)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-15 11:03:26 +02:00
Ettore Di Giacinto
aa7cb7e18c chore(model gallery): add aquif-ai_aquif-3.5-8b-think (#6269)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-15 10:42:42 +02:00
Ettore Di Giacinto
eadd3d4e46 chore(model gallery): add baidu_ernie-4.5-21b-a3b-thinking (#6267)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-15 10:27:02 +02:00
LocalAI [bot]
2a18206033 chore: ⬆️ Update ggml-org/llama.cpp to 6c019cb04e86e2dacfe62ce7666c64e9717dde1f (#6265)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-14 21:19:41 +00:00
LocalAI [bot]
39798d734e chore: ⬆️ Update ggml-org/llama.cpp to 0fa154e3502e940df914f03b41475a2b80b985b0 (#6263)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-14 19:59:58 +00:00
Gianluca Boiano
d0e99562af chore(aio): upgrade minicpm-v model to latest 4.5 (#6262)
chore(aio): upgrade vision model to MiniCPM-V 4.5

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
2025-09-14 15:04:58 +02:00
Ettore Di Giacinto
6410c99bf2 fix(llama-cpp): correctly calculate embeddings (#6259)
* chore(tests): check embeddings differs in llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(llama.cpp): use the correct field for embedding

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(llama.cpp): use embedding type none

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(tests): add test-cases in aio-e2e suite

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-09-13 23:11:54 +02:00
LocalAI [bot]
55766d269b chore: ⬆️ Update ggml-org/llama.cpp to aa0c461efe3603639af1a1defed2438d9c16ca0f (#6261)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-13 21:11:18 +00:00
Ettore Di Giacinto
ffa0ad1eac Fix formatting issues in README.md links
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-09-13 09:16:17 +02:00
LocalAI [bot]
623789a29e chore: ⬆️ Update ggml-org/llama.cpp to 40be51152d4dc2d47444a4ed378285139859895b (#6260)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-12 21:10:39 +00:00
Richard Palethorpe
2b9a3d32c9 chore: ⬆️ Update leejet/stable-diffusion.cpp to fce6afcc6a3250a8e17923608922d2a99b339b47 (#6256)
* ⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* fix(stablediffusion-ggml): Add SMOOTHSTEP scheduler and assert sampler and scheduler counts

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Signed-off-by: Richard Palethorpe <io@richiejp.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-12 12:28:20 +02:00
LocalAI [bot]
f8b71dc5d0 chore: ⬆️ Update ggml-org/llama.cpp to 0e6ff0046f4a2983b2c77950aa75960fe4b4f0e2 (#6235)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-09-11 21:21:49 +00:00
KingJ
1d3331b5cb fix(rocm): Rename tag suffix for hipblas whisper build to match backend config (#6247)
Rename tag suffix for hipblas whisper to match backend config

hipblas images generally have the suffix `-gpu-rocm-hipblas-X`. One exception to this currently is the hipblas build of Whisper which has the suffix `gpu-hipblas-whisper.

However, as `backend/index.yaml` references the image tag for Whisper using the more consistent form (i.e. `latest-gpu-rocm-hipblas-whisper`), it is not possible to add the backend as raised in #6114.

Therefore, rename the suffix for hipblas whisper images to use the more consistent form, aligning with other hipblas builds as well as the expected image name in `backend/index.yaml`.

Signed-off-by: Kingsley Jarrett <kj@kingj.net>
2025-09-11 21:19:09 +02:00
Mário Freitas
2c0b9c6349 fix(chat): use proper finish_reason for tool/function calling (#6243)
Signed-off-by: Mário Freitas <imkira@gmail.com>
2025-09-11 21:13:23 +02:00
qxo
3c6c976755 feat: support HF_ENDPOINT env for the HuggingFace endpoint (#6220)
ie: `HF_ENDPOINT=https://hf-mirror.com`
2025-09-11 21:04:57 +02:00
28 changed files with 569 additions and 125 deletions

View File

@@ -89,7 +89,7 @@ jobs:
context: "./backend"
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-l4t-diffusers'
@@ -187,7 +187,7 @@ jobs:
# CUDA 12 builds
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
@@ -199,7 +199,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
@@ -211,7 +211,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
@@ -223,7 +223,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
@@ -235,7 +235,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
@@ -248,7 +248,7 @@ jobs:
# CUDA 12 additional backends
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
@@ -260,7 +260,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
@@ -272,7 +272,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
@@ -284,7 +284,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-bark'
@@ -296,7 +296,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
@@ -578,7 +578,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -615,7 +615,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -675,7 +675,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -700,7 +700,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -760,7 +760,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -775,7 +775,7 @@ jobs:
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas-whisper'
tag-suffix: '-gpu-rocm-hipblas-whisper'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
runs-on: 'ubuntu-latest'
skip-drivers: 'false'
@@ -836,7 +836,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -872,7 +872,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -897,7 +897,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-exllama2'

View File

@@ -36,7 +36,7 @@ jobs:
include:
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-gpu-nvidia-cuda-12'

View File

@@ -91,7 +91,7 @@ jobs:
aio: "-aio-gpu-nvidia-cuda-11"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12'
@@ -144,7 +144,7 @@ jobs:
include:
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64'

View File

@@ -6,7 +6,8 @@ permissions:
contents: write
pull-requests: write
packages: read
issues: write # for Homebrew/actions/post-comment
actions: write # to dispatch publish workflow
jobs:
dependabot:
runs-on: ubuntu-latest

View File

@@ -18,7 +18,7 @@ FROM requirements AS requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=8
ARG CUDA_MINOR_VERSION=0
ARG SKIP_DRIVERS=false
ARG TARGETARCH
ARG TARGETVARIANT

View File

@@ -117,8 +117,8 @@ run: ## run local-ai
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
test-models/testmodel.ggml:
mkdir test-models
mkdir test-dir
mkdir -p test-models
mkdir -p test-dir
wget -q https://huggingface.co/mradermacher/gpt2-alpaca-gpt4-GGUF/resolve/main/gpt2-alpaca-gpt4.Q4_K_M.gguf -O test-models/testmodel.ggml
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
@@ -170,7 +170,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=8 -t localai-tests .
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)

View File

@@ -43,7 +43,7 @@
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)

View File

@@ -2,10 +2,10 @@ context_size: 4096
f16: true
backend: llama-cpp
mmap: true
mmproj: minicpm-v-2_6-mmproj-f16.gguf
mmproj: minicpm-v-4_5-mmproj-f16.gguf
name: gpt-4o
parameters:
model: minicpm-v-2_6-Q4_K_M.gguf
model: minicpm-v-4_5-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
@@ -42,9 +42,9 @@ template:
<|im_start|>assistant
download_files:
- filename: minicpm-v-2_6-Q4_K_M.gguf
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-2_6-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
- filename: minicpm-v-4_5-Q4_K_M.gguf
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8

View File

@@ -2,10 +2,10 @@ context_size: 4096
backend: llama-cpp
f16: true
mmap: true
mmproj: minicpm-v-2_6-mmproj-f16.gguf
mmproj: minicpm-v-4_5-mmproj-f16.gguf
name: gpt-4o
parameters:
model: minicpm-v-2_6-Q4_K_M.gguf
model: minicpm-v-4_5-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
@@ -42,9 +42,9 @@ template:
<|im_start|>assistant
download_files:
- filename: minicpm-v-2_6-Q4_K_M.gguf
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-2_6-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
- filename: minicpm-v-4_5-Q4_K_M.gguf
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8

View File

@@ -2,10 +2,10 @@ context_size: 4096
backend: llama-cpp
f16: true
mmap: true
mmproj: minicpm-v-2_6-mmproj-f16.gguf
mmproj: minicpm-v-4_5-mmproj-f16.gguf
name: gpt-4o
parameters:
model: minicpm-v-2_6-Q4_K_M.gguf
model: minicpm-v-4_5-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
@@ -43,9 +43,9 @@ template:
download_files:
- filename: minicpm-v-2_6-Q4_K_M.gguf
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-2_6-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
- filename: minicpm-v-4_5-Q4_K_M.gguf
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8

View File

@@ -111,7 +111,7 @@ docker build -f backend/Dockerfile.python \
--build-arg BACKEND=transformers \
--build-arg BUILD_TYPE=cublas12 \
--build-arg CUDA_MAJOR_VERSION=12 \
--build-arg CUDA_MINOR_VERSION=8 \
--build-arg CUDA_MINOR_VERSION=0 \
-t localai-backend-transformers .
# Build Go backend

View File

@@ -1,6 +1,6 @@
LLAMA_VERSION?=fe4eb4f8ec25a1239b0923f1c7f87adf5730c3e5
LLAMA_REPO?=https://github.com/JohannesGaessler/llama.cpp
LLAMA_VERSION?=0320ac5264279d74f8ee91bafa6c90e9ab9bbb91
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=
BUILD_TYPE?=

View File

@@ -701,7 +701,7 @@ public:
*/
// for the shape of input/content, see tokenize_input_prompts()
json prompt = body.at("prompt");
json prompt = body.at("embeddings");
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
@@ -712,6 +712,7 @@ public:
}
}
int embd_normalize = 2; // default to Euclidean/L2 norm
// create and queue the task
json responses = json::array();
bool error = false;
@@ -725,9 +726,8 @@ public:
task.index = i;
task.prompt_tokens = std::move(tokenized_prompts[i]);
// OAI-compat
task.params.oaicompat = OAICOMPAT_TYPE_EMBEDDING;
task.params.oaicompat = OAICOMPAT_TYPE_NONE;
task.params.embd_normalize = embd_normalize;
tasks.push_back(std::move(task));
}
@@ -743,9 +743,8 @@ public:
responses.push_back(res->to_json());
}
}, [&](const json & error_data) {
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, error_data.value("content", ""));
error = true;
}, [&]() {
// NOTE: we should try to check when the writer is closed here
return false;
});
@@ -755,12 +754,36 @@ public:
return grpc::Status(grpc::StatusCode::INTERNAL, "Error in receiving results");
}
std::vector<float> embeddings = responses[0].value("embedding", std::vector<float>());
// loop the vector and set the embeddings results
for (int i = 0; i < embeddings.size(); i++) {
embeddingResult->add_embeddings(embeddings[i]);
std::cout << "[DEBUG] Responses size: " << responses.size() << std::endl;
// Process the responses and extract embeddings
for (const auto & response_elem : responses) {
// Check if the response has an "embedding" field
if (response_elem.contains("embedding")) {
json embedding_data = json_value(response_elem, "embedding", json::array());
if (embedding_data.is_array() && !embedding_data.empty()) {
for (const auto & embedding_vector : embedding_data) {
if (embedding_vector.is_array()) {
for (const auto & embedding_value : embedding_vector) {
embeddingResult->add_embeddings(embedding_value.get<float>());
}
}
}
}
} else {
// Check if the response itself contains the embedding data directly
if (response_elem.is_array()) {
for (const auto & embedding_value : response_elem) {
embeddingResult->add_embeddings(embedding_value.get<float>());
}
}
}
}
return grpc::Status::OK;
}

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=b0179181069254389ccad604e44f17a2c25b4094
STABLEDIFFUSION_GGML_VERSION?=0ebe6fe118f125665939b27c89f34ed38716bff8
CMAKE_ARGS+=-DGGML_MAX_NAME=128

View File

@@ -4,17 +4,11 @@
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <iostream>
#include <random>
#include <string>
#include <vector>
#include <filesystem>
#include "gosd.h"
// #include "preprocessing.hpp"
#include "flux.hpp"
#include "stable-diffusion.h"
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_STATIC
#include "stb_image.h"
@@ -29,7 +23,7 @@
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
const char* sample_method_str[] = {
"euler_a",
"default",
"euler",
"heun",
"dpm2",
@@ -41,8 +35,11 @@ const char* sample_method_str[] = {
"lcm",
"ddim_trailing",
"tcd",
"euler_a",
};
static_assert(std::size(sample_method_str) == SAMPLE_METHOD_COUNT, "sample method mismatch");
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
const char* schedulers[] = {
"default",
@@ -51,8 +48,11 @@ const char* schedulers[] = {
"exponential",
"ays",
"gits",
"smoothstep",
};
static_assert(std::size(schedulers) == SCHEDULE_COUNT, "schedulers mismatch");
sd_ctx_t* sd_c;
// Moved from the context (load time) to generation time params
scheduler_t scheduler = scheduler_t::DEFAULT;
@@ -168,7 +168,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
}
if (sample_method_found == -1) {
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
sample_method_found = EULER_A;
sample_method_found = sample_method_t::SAMPLE_METHOD_DEFAULT;
}
sample_method = (sample_method_t)sample_method_found;
@@ -192,9 +192,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
ctx_params.control_net_path = "";
ctx_params.lora_model_dir = lora_dir;
ctx_params.embedding_dir = "";
ctx_params.stacked_id_embed_dir = "";
ctx_params.vae_decode_only = false;
ctx_params.vae_tiling = false;
ctx_params.free_params_immediately = false;
ctx_params.n_threads = threads;
ctx_params.rng_type = STD_DEFAULT_RNG;
@@ -220,7 +218,49 @@ int load_model(const char *model, char *model_path, char* options[], int threads
return 0;
}
int gen_image(char *text, char *negativeText, int width, int height, int steps, int64_t seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count) {
void sd_tiling_params_set_enabled(sd_tiling_params_t *params, bool enabled) {
params->enabled = enabled;
}
void sd_tiling_params_set_tile_sizes(sd_tiling_params_t *params, int tile_size_x, int tile_size_y) {
params->tile_size_x = tile_size_x;
params->tile_size_y = tile_size_y;
}
void sd_tiling_params_set_rel_sizes(sd_tiling_params_t *params, float rel_size_x, float rel_size_y) {
params->rel_size_x = rel_size_x;
params->rel_size_y = rel_size_y;
}
void sd_tiling_params_set_target_overlap(sd_tiling_params_t *params, float target_overlap) {
params->target_overlap = target_overlap;
}
sd_tiling_params_t* sd_img_gen_params_get_vae_tiling_params(sd_img_gen_params_t *params) {
return &params->vae_tiling_params;
}
sd_img_gen_params_t* sd_img_gen_params_new(void) {
sd_img_gen_params_t *params = (sd_img_gen_params_t *)std::malloc(sizeof(sd_img_gen_params_t));
sd_img_gen_params_init(params);
return params;
}
void sd_img_gen_params_set_prompts(sd_img_gen_params_t *params, const char *prompt, const char *negative_prompt) {
params->prompt = prompt;
params->negative_prompt = negative_prompt;
}
void sd_img_gen_params_set_dimensions(sd_img_gen_params_t *params, int width, int height) {
params->width = width;
params->height = height;
}
void sd_img_gen_params_set_seed(sd_img_gen_params_t *params, int64_t seed) {
params->seed = seed;
}
int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count) {
sd_image_t* results;
@@ -228,21 +268,15 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
fprintf (stderr, "Generating image\n");
sd_img_gen_params_t p;
sd_img_gen_params_init(&p);
p->sample_params.guidance.txt_cfg = cfg_scale;
p->sample_params.guidance.slg.layers = skip_layers.data();
p->sample_params.guidance.slg.layer_count = skip_layers.size();
p->sample_params.sample_method = sample_method;
p->sample_params.sample_steps = steps;
p->sample_params.scheduler = scheduler;
p.prompt = text;
p.negative_prompt = negativeText;
p.sample_params.guidance.txt_cfg = cfg_scale;
p.sample_params.guidance.slg.layers = skip_layers.data();
p.sample_params.guidance.slg.layer_count = skip_layers.size();
p.width = width;
p.height = height;
p.sample_params.sample_method = sample_method;
p.sample_params.sample_steps = steps;
p.seed = seed;
p.input_id_images_path = "";
p.sample_params.scheduler = scheduler;
int width = p->width;
int height = p->height;
// Handle input image for img2img
bool has_input_image = (src_image != NULL && strlen(src_image) > 0);
@@ -291,13 +325,13 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
input_image_buffer = resized_image_buffer;
}
p.init_image = {(uint32_t)width, (uint32_t)height, 3, input_image_buffer};
p.strength = strength;
p->init_image = {(uint32_t)width, (uint32_t)height, 3, input_image_buffer};
p->strength = strength;
fprintf(stderr, "Using img2img with strength: %.2f\n", strength);
} else {
// No input image, use empty image for text-to-image
p.init_image = {(uint32_t)width, (uint32_t)height, 3, NULL};
p.strength = 0.0f;
p->init_image = {(uint32_t)width, (uint32_t)height, 3, NULL};
p->strength = 0.0f;
}
// Handle mask image for inpainting
@@ -337,12 +371,12 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
mask_image_buffer = resized_mask_buffer;
}
p.mask_image = {(uint32_t)width, (uint32_t)height, 1, mask_image_buffer};
p->mask_image = {(uint32_t)width, (uint32_t)height, 1, mask_image_buffer};
fprintf(stderr, "Using inpainting with mask\n");
} else {
// No mask image, create default full mask
default_mask_image_vec.resize(width * height, 255);
p.mask_image = {(uint32_t)width, (uint32_t)height, 1, default_mask_image_vec.data()};
p->mask_image = {(uint32_t)width, (uint32_t)height, 1, default_mask_image_vec.data()};
}
// Handle reference images
@@ -400,13 +434,15 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
}
if (!ref_images_vec.empty()) {
p.ref_images = ref_images_vec.data();
p.ref_images_count = ref_images_vec.size();
p->ref_images = ref_images_vec.data();
p->ref_images_count = ref_images_vec.size();
fprintf(stderr, "Using %zu reference images\n", ref_images_vec.size());
}
}
results = generate_image(sd_c, &p);
results = generate_image(sd_c, p);
std::free(p);
if (results == NULL) {
fprintf (stderr, "NO results\n");

View File

@@ -22,7 +22,18 @@ type SDGGML struct {
var (
LoadModel func(model, model_apth string, options []uintptr, threads int32, diff int) int
GenImage func(text, negativeText string, width, height, steps int, seed int64, dst string, cfgScale float32, srcImage string, strength float32, maskImage string, refImages []string, refImagesCount int) int
GenImage func(params uintptr, steps int, dst string, cfgScale float32, srcImage string, strength float32, maskImage string, refImages []string, refImagesCount int) int
TilingParamsSetEnabled func(params uintptr, enabled bool)
TilingParamsSetTileSizes func(params uintptr, tileSizeX int, tileSizeY int)
TilingParamsSetRelSizes func(params uintptr, relSizeX float32, relSizeY float32)
TilingParamsSetTargetOverlap func(params uintptr, targetOverlap float32)
ImgGenParamsNew func() uintptr
ImgGenParamsSetPrompts func(params uintptr, prompt string, negativePrompt string)
ImgGenParamsSetDimensions func(params uintptr, width int, height int)
ImgGenParamsSetSeed func(params uintptr, seed int64)
ImgGenParamsGetVaeTilingParams func(params uintptr) uintptr
)
// Copied from Purego internal/strings
@@ -120,7 +131,15 @@ func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
// Default strength for img2img (0.75 is a good default)
strength := float32(0.75)
ret := GenImage(t, negative, int(opts.Width), int(opts.Height), int(opts.Step), int64(opts.Seed), dst, sd.cfgScale, srcImage, strength, maskImage, refImages, refImagesCount)
// free'd by GenImage
p := ImgGenParamsNew()
ImgGenParamsSetPrompts(p, t, negative)
ImgGenParamsSetDimensions(p, int(opts.Width), int(opts.Height))
ImgGenParamsSetSeed(p, int64(opts.Seed))
vaep := ImgGenParamsGetVaeTilingParams(p)
TilingParamsSetEnabled(vaep, false)
ret := GenImage(p, int(opts.Step), dst, sd.cfgScale, srcImage, strength, maskImage, refImages, refImagesCount)
if ret != 0 {
return fmt.Errorf("inference failed")
}

View File

@@ -1,8 +1,23 @@
#include <cstdint>
#include "stable-diffusion.h"
#ifdef __cplusplus
extern "C" {
#endif
void sd_tiling_params_set_enabled(sd_tiling_params_t *params, bool enabled);
void sd_tiling_params_set_tile_sizes(sd_tiling_params_t *params, int tile_size_x, int tile_size_y);
void sd_tiling_params_set_rel_sizes(sd_tiling_params_t *params, float rel_size_x, float rel_size_y);
void sd_tiling_params_set_target_overlap(sd_tiling_params_t *params, float target_overlap);
sd_tiling_params_t* sd_img_gen_params_get_vae_tiling_params(sd_img_gen_params_t *params);
sd_img_gen_params_t* sd_img_gen_params_new(void);
void sd_img_gen_params_set_prompts(sd_img_gen_params_t *params, const char *prompt, const char *negative_prompt);
void sd_img_gen_params_set_dimensions(sd_img_gen_params_t *params, int width, int height);
void sd_img_gen_params_set_seed(sd_img_gen_params_t *params, int64_t seed);
int load_model(const char *model, char *model_path, char* options[], int threads, int diffusionModel);
int gen_image(char *text, char *negativeText, int width, int height, int steps, int64_t seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count);
int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count);
#ifdef __cplusplus
}
#endif

View File

@@ -11,14 +11,35 @@ var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
type LibFuncs struct {
FuncPtr any
Name string
}
func main() {
gosd, err := purego.Dlopen("./libgosd.so", purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
panic(err)
}
purego.RegisterLibFunc(&LoadModel, gosd, "load_model")
purego.RegisterLibFunc(&GenImage, gosd, "gen_image")
libFuncs := []LibFuncs{
{&LoadModel, "load_model"},
{&GenImage, "gen_image"},
{&TilingParamsSetEnabled, "sd_tiling_params_set_enabled"},
{&TilingParamsSetTileSizes, "sd_tiling_params_set_tile_sizes"},
{&TilingParamsSetRelSizes, "sd_tiling_params_set_rel_sizes"},
{&TilingParamsSetTargetOverlap, "sd_tiling_params_set_target_overlap"},
{&ImgGenParamsNew, "sd_img_gen_params_new"},
{&ImgGenParamsSetPrompts, "sd_img_gen_params_set_prompts"},
{&ImgGenParamsSetDimensions, "sd_img_gen_params_set_dimensions"},
{&ImgGenParamsSetSeed, "sd_img_gen_params_set_seed"},
{&ImgGenParamsGetVaeTilingParams, "sd_img_gen_params_get_vae_tiling_params"},
}
for _, lf := range libFuncs {
purego.RegisterLibFunc(lf.FuncPtr, gosd, lf.Name)
}
flag.Parse()

View File

@@ -836,27 +836,40 @@ var _ = Describe("API test", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
embeddingModel := openai.AdaEmbeddingV2
resp, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Model: embeddingModel,
Input: []string{"sun", "cat"},
},
)
Expect(err).ToNot(HaveOccurred(), err)
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 4096))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 4096))
sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Model: embeddingModel,
Input: []string{"sun"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
resp3, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: embeddingModel,
Input: []string{"cat"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp3.Data[0].Embedding).To(Equal(resp.Data[1].Embedding))
Expect(resp3.Data[0].Embedding).ToNot(Equal(sunEmbedding))
})
Context("External gRPC calls", func() {

View File

@@ -398,9 +398,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
finishReason := "stop"
if toolsCalled {
if toolsCalled && len(input.Tools) > 0 {
finishReason = "tool_calls"
} else if toolsCalled && len(input.Tools) == 0 {
} else if toolsCalled {
finishReason = "function_call"
}
@@ -443,11 +443,6 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
finishReason := "stop"
if len(input.Tools) > 0 {
finishReason = "tool_calls"
}
switch {
case noActionsToRun:
result, err := handleQuestion(config, cl, input, ml, startupOptions, results, s, predInput)
@@ -457,11 +452,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
*c = append(*c, schema.Choice{
FinishReason: finishReason,
FinishReason: "stop",
Message: &schema.Message{Role: "assistant", Content: &result}})
default:
toolChoice := schema.Choice{
FinishReason: finishReason,
FinishReason: "tool_calls",
Message: &schema.Message{
Role: "assistant",
},

View File

@@ -9,5 +9,5 @@ import (
func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI test suite")
RunSpecs(t, "LocalAI HTTP test suite")
}

View File

@@ -182,7 +182,7 @@ MODEL_NAME=gemma-3-12b-it docker compose up
# NVIDIA GPU setup with custom multimodal and image models
MODEL_NAME=gemma-3-12b-it \
MULTIMODAL_MODEL=minicpm-v-2_6 \
MULTIMODAL_MODEL=minicpm-v-4_5 \
IMAGE_MODEL=flux.1-dev-ggml \
docker compose -f docker-compose.nvidia.yaml up
```

View File

@@ -1,3 +1,3 @@
{
"version": "v3.5.0"
"version": "v3.5.1"
}

View File

@@ -1,4 +1,30 @@
---
- &ernie
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
name: "baidu_ernie-4.5-21b-a3b-thinking"
license: apache-2.0
tags:
- gguf
- GPU
- CPU
- text-to-text
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/64f187a2cc1c03340ac30498/TYYUxK8xD1AxExFMWqbZD.png
urls:
- https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking
- https://huggingface.co/bartowski/baidu_ERNIE-4.5-21B-A3B-Thinking-GGUF
description: |
Over the past three months, we have continued to scale the thinking capability of ERNIE-4.5-21B-A3B, improving both the quality and depth of reasoning, thereby advancing the competitiveness of ERNIE lightweight models in complex reasoning tasks. We are pleased to introduce ERNIE-4.5-21B-A3B-Thinking, featuring the following key enhancements:
Significantly improved performance on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.
Efficient tool usage capabilities.
Enhanced 128K long-context understanding capabilities.
Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks. ERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token.
overrides:
parameters:
model: baidu_ERNIE-4.5-21B-A3B-Thinking-Q4_K_M.gguf
files:
- filename: baidu_ERNIE-4.5-21B-A3B-Thinking-Q4_K_M.gguf
sha256: f309f225c413324c585e74ce28c55e76dec25340156374551d39707fc2966840
uri: huggingface://bartowski/baidu_ERNIE-4.5-21B-A3B-Thinking-GGUF/baidu_ERNIE-4.5-21B-A3B-Thinking-Q4_K_M.gguf
- &mimo
license: mit
tags:
@@ -2544,6 +2570,74 @@
- filename: minicpm-v-4_5-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
- !!merge <<: *qwen3
name: "aquif-ai_aquif-3.5-8b-think"
urls:
- https://huggingface.co/aquif-ai/aquif-3.5-8B-Think
- https://huggingface.co/bartowski/aquif-ai_aquif-3.5-8B-Think-GGUF
description: |
The aquif-3.5 series is the successor to aquif-3, featuring a simplified naming scheme, expanded Mixture of Experts (MoE) options, and across-the-board performance improvements. This release streamlines model selection while delivering enhanced capabilities across reasoning, multilingual support, and general intelligence tasks.
An experimental small-scale Mixture of Experts model designed for multilingual applications with minimal computational overhead. Despite its compact active parameter count, it demonstrates competitive performance against larger dense models.
overrides:
parameters:
model: aquif-ai_aquif-3.5-8B-Think-Q4_K_M.gguf
files:
- filename: aquif-ai_aquif-3.5-8B-Think-Q4_K_M.gguf
sha256: 9e49b9c840de23bb3eb181ba7a102706c120b3e3d006983c3f14ebae307ff02e
uri: huggingface://bartowski/aquif-ai_aquif-3.5-8B-Think-GGUF/aquif-ai_aquif-3.5-8B-Think-Q4_K_M.gguf
- !!merge <<: *qwen3
name: "qwen3-stargate-sg1-uncensored-abliterated-8b-i1"
icon: https://huggingface.co/DavidAU/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B/resolve/main/sg1.jpg
urls:
- https://huggingface.co/DavidAU/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B
- https://huggingface.co/mradermacher/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B-i1-GGUF
description: |
This repo contains the full precision source code, in "safe tensors" format to generate GGUFs, GPTQ, EXL2, AWQ, HQQ and other formats. The source code can also be used directly.
This model is specifically for SG1 (Stargate Series), science fiction, story generation (all genres) but also does coding and general tasks too.
This model can also be used for Role play.
This model will produce uncensored content (see notes below).
Fine tune (6 epochs, using Unsloth for Win 11) on an inhouse generated dataset to simulate / explore the Stargate SG1 Universe.
This version has the "canon" of all 10 seasons of SG1.
Model also contains, but not trained, on content from Stargate Atlantis, and Universe.
Fine tune process adds knowledge to the model, and alter all aspects of its operations.
Float32 (32 bit precision) was used to further increase the model's quality.
This model is based on "Goekdeniz-Guelmez/Josiefied-Qwen3-8B-abliterated-v1".
Example generations at the bottom of this page.
This is a Stargate (SG1) fine tune (1,331,953,664 of 9,522,689,024 (13.99% trained)), SIX epochs on this model.
As this is an instruct model, it will also benefit from a detailed system prompt too.
overrides:
parameters:
model: Qwen3-Stargate-SG1-Uncensored-Abliterated-8B.i1-Q4_K_M.gguf
files:
- filename: Qwen3-Stargate-SG1-Uncensored-Abliterated-8B.i1-Q4_K_M.gguf
sha256: 31ec697ccebbd7928c49714b8a0ec8be747be0f7c1ad71627967d2f8fe376990
uri: huggingface://mradermacher/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B-i1-GGUF/Qwen3-Stargate-SG1-Uncensored-Abliterated-8B.i1-Q4_K_M.gguf
- !!merge <<: *qwen3
url: "github:mudler/LocalAI/gallery/qwen3-deepresearch.yaml@master"
name: "alibaba-nlp_tongyi-deepresearch-30b-a3b"
urls:
- https://huggingface.co/Alibaba-NLP/Tongyi-DeepResearch-30B-A3B
- https://huggingface.co/bartowski/Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-GGUF
description: |
We present Tongyi DeepResearch, an agentic large language model featuring 30 billion total parameters, with only 3 billion activated per token. Developed by Tongyi Lab, the model is specifically designed for long-horizon, deep information-seeking tasks. Tongyi-DeepResearch demonstrates state-of-the-art performance across a range of agentic search benchmarks, including Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch and FRAMES.
overrides:
parameters:
model: Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-Q4_K_M.gguf
files:
- filename: Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-Q4_K_M.gguf
sha256: 1afefb3b369ea2de191f24fe8ea22cbbb7b412357902f27bd81d693dde35c2d9
uri: huggingface://bartowski/Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-GGUF/Alibaba-NLP_Tongyi-DeepResearch-30B-A3B-Q4_K_M.gguf
- &gemma3
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
name: "gemma-3-27b-it"
@@ -7485,6 +7579,40 @@
- filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615
uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "websailor-32b"
urls:
- https://huggingface.co/Alibaba-NLP/WebSailor-32B
- https://huggingface.co/mradermacher/WebSailor-32B-GGUF
description: |
WebSailor is a complete post-training methodology designed to teach LLM agents sophisticated reasoning for complex web navigation and information-seeking tasks. It addresses the challenge of extreme uncertainty in vast information landscapes, a capability where previous open-source models lagged behind proprietary systems.
We classify information-seeking tasks into three difficulty levels, where Level 3 represents problems with both high uncertainty and a complex, non-linear path to a solution. To generate these challenging tasks, we introduce SailorFog-QA, a novel data synthesis pipeline that constructs intricate knowledge graphs and then applies information obfuscation. This process creates questions with high initial uncertainty that demand creative exploration and transcend simple, structured reasoning patterns.
Our training process begins by generating expert trajectories and then reconstructing the reasoning to create concise, action-oriented supervision signals, avoiding the stylistic and verbosity issues of teacher models. The agent is first given a "cold start" using rejection sampling fine-tuning (RFT) on a small set of high-quality examples to establish a baseline capability. This is followed by an efficient agentic reinforcement learning stage using our Duplicating Sampling Policy Optimization (DUPO) algorithm, which refines the agent's exploratory strategies.
WebSailor establishes a new state-of-the-art for open-source agents, achieving outstanding results on difficult benchmarks like BrowseComp-en and BrowseComp-zh. Notably, our smaller models like WebSailor-7B outperform agents built on much larger backbones, highlighting the efficacy of our training paradigm. Ultimately, WebSailor closes the performance gap to proprietary systems, achieving results on par with agents like Doubao-Search.
overrides:
parameters:
model: WebSailor-32B.Q4_K_M.gguf
files:
- filename: WebSailor-32B.Q4_K_M.gguf
sha256: 60cea732b8314cedf1807530857b4ebd9f6c41431b3223384eb7f94fbff7b5bc
uri: huggingface://mradermacher/WebSailor-32B-GGUF/WebSailor-32B.Q4_K_M.gguf
- !!merge <<: *qwen25
name: "websailor-7b"
urls:
- https://huggingface.co/Alibaba-NLP/WebSailor-7B
- https://huggingface.co/mradermacher/WebSailor-7B-GGUF
description: |
WebSailor is a complete post-training methodology designed to teach LLM agents sophisticated reasoning for complex web navigation and information-seeking tasks. It addresses the challenge of extreme uncertainty in vast information landscapes, a capability where previous open-source models lagged behind proprietary systems.
We classify information-seeking tasks into three difficulty levels, where Level 3 represents problems with both high uncertainty and a complex, non-linear path to a solution. To generate these challenging tasks, we introduce SailorFog-QA, a novel data synthesis pipeline that constructs intricate knowledge graphs and then applies information obfuscation. This process creates questions with high initial uncertainty that demand creative exploration and transcend simple, structured reasoning patterns.
Our training process begins by generating expert trajectories and then reconstructing the reasoning to create concise, action-oriented supervision signals, avoiding the stylistic and verbosity issues of teacher models. The agent is first given a "cold start" using rejection sampling fine-tuning (RFT) on a small set of high-quality examples to establish a baseline capability. This is followed by an efficient agentic reinforcement learning stage using our Duplicating Sampling Policy Optimization (DUPO) algorithm, which refines the agent's exploratory strategies.
WebSailor establishes a new state-of-the-art for open-source agents, achieving outstanding results on difficult benchmarks like BrowseComp-en and BrowseComp-zh. Notably, our smaller models like WebSailor-7B outperform agents built on much larger backbones, highlighting the efficacy of our training paradigm. Ultimately, WebSailor closes the performance gap to proprietary systems, achieving results on par with agents like Doubao-Search.
overrides:
parameters:
model: WebSailor-7B.Q4_K_M.gguf
files:
- filename: WebSailor-7B.Q4_K_M.gguf
sha256: 6ede884af5d82176606c3af19a5cc90da6fdf81a520f54284084f5e012217a56
uri: huggingface://mradermacher/WebSailor-7B-GGUF/WebSailor-7B.Q4_K_M.gguf
- &archfunct
license: apache-2.0
tags:
@@ -9884,6 +10012,119 @@
- filename: baichuan-inc_Baichuan-M2-32B-Q4_K_M.gguf
sha256: 51907419518e6f79c28f75e4097518e54c2efecd85cb4c714334395fa2d591c2
uri: huggingface://bartowski/baichuan-inc_Baichuan-M2-32B-GGUF/baichuan-inc_Baichuan-M2-32B-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "k2-think-i1"
icon: https://huggingface.co/LLM360/K2-Think/resolve/main/banner.png
urls:
- https://huggingface.co/LLM360/K2-Think
- https://huggingface.co/mradermacher/K2-Think-i1-GGUF
description: |
K2-Think is a 32 billion parameter open-weights general reasoning model with strong performance in competitive mathematical problem solving.
overrides:
parameters:
model: K2-Think.i1-Q4_K_M.gguf
files:
- filename: K2-Think.i1-Q4_K_M.gguf
sha256: 510fad18b0cf58059437338c1b5b982996ef89456a8d88da52eb3d50fe78b9fd
uri: huggingface://mradermacher/K2-Think-i1-GGUF/K2-Think.i1-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "holo1.5-72b"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png
urls:
- https://huggingface.co/Hcompany/Holo1.5-72B
- https://huggingface.co/mradermacher/Holo1.5-72B-GGUF
description: |
Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently.
The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick.
overrides:
mmproj: Holo1.5-72B.mmproj-Q8_0.gguf
parameters:
model: Holo1.5-72B.Q4_K_M.gguf
files:
- filename: Holo1.5-72B.Q4_K_M.gguf
sha256: 3404347c245fefa352a3dc16134b5870f594ab8bff11e50582205b5538201a23
uri: huggingface://mradermacher/Holo1.5-72B-GGUF/Holo1.5-72B.Q4_K_M.gguf
- filename: Holo1.5-72B.mmproj-Q8_0.gguf
sha256: f172cffc96a00d4f885eecffbc798912d37105f4191ba16a9947a5776b0f8a02
uri: huggingface://mradermacher/Holo1.5-72B-GGUF/Holo1.5-72B.mmproj-Q8_0.gguf
- !!merge <<: *qwen25
name: "holo1.5-7b"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png
urls:
- https://huggingface.co/Hcompany/Holo1.5-7B
- https://huggingface.co/mradermacher/Holo1.5-7B-GGUF
description: |
Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently.
The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick.
overrides:
mmproj: Holo1.5-7B.mmproj-Q8_0.gguf
parameters:
model: Holo1.5-7B.Q4_K_M.gguf
files:
- filename: Holo1.5-7B.Q4_K_M.gguf
sha256: 37d1c060b73b783ffdab8d70fa47a6cff46cd34b1cf44b5bfbf4f20ff99eacdd
uri: huggingface://mradermacher/Holo1.5-7B-GGUF/Holo1.5-7B.Q4_K_M.gguf
- filename: Holo1.5-7B.mmproj-Q8_0.gguf
sha256: a9bad2d3d9241251b8753d9be4ea737c03197077d96153c1365a62db709489f6
uri: huggingface://mradermacher/Holo1.5-7B-GGUF/Holo1.5-7B.mmproj-Q8_0.gguf
- !!merge <<: *qwen25
name: "holo1.5-3b"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/677d3f355f847864bb644112/OQyAJ33sssiTDIQEQ7oH_.png
urls:
- https://huggingface.co/Hcompany/Holo1.5-3B
- https://huggingface.co/mradermacher/Holo1.5-3B-GGUF
description: |
Computer Use (CU) agents are AI systems that can interact with real applications—web, desktop, and mobile—on behalf of a user. They can navigate interfaces, manipulate elements, and answer questions about content, enabling powerful automation and productivity tools. CU agents are becoming increasingly important as they allow humans to delegate complex digital tasks safely and efficiently.
The Holo1.5 series provides state-of-the-art foundational models for building such agents. Holo1.5 models excel at user interface (UI) localization and UI-based question answering (QA) across web, computer, and mobile environments, with strong performance on benchmarks including Screenspot-V2, Screenspot-Pro, GroundUI-Web, Showdown, and our newly introduced WebClick.
overrides:
mmproj: Holo1.5-3B.mmproj-Q8_0.gguf
parameters:
model: Holo1.5-3B.Q4_K_M.gguf
files:
- filename: Holo1.5-3B.Q4_K_M.gguf
sha256: 5efb1318d439fe1f71e38825a17203c48ced7de4a5d0796427c8c638e817622a
uri: huggingface://mradermacher/Holo1.5-3B-GGUF/Holo1.5-3B.Q4_K_M.gguf
- filename: Holo1.5-3B.mmproj-Q8_0.gguf
sha256: fb5cc798b386a4b680c306f061457cb16cc627c7d9ed401d660b8b940463142b
uri: huggingface://mradermacher/Holo1.5-3B-GGUF/Holo1.5-3B.mmproj-Q8_0.gguf
- !!merge <<: *qwen25
name: "webwatcher-7b"
icon: https://huggingface.co/Alibaba-NLP/WebWatcher-7B/resolve/main/assets/webwatcher_logo.png
urls:
- https://huggingface.co/Alibaba-NLP/WebWatcher-7B
- https://huggingface.co/mradermacher/WebWatcher-7B-GGUF
description: |
WebWatcher is a multimodal agent for deep research that possesses enhanced visual-language reasoning capabilities. Our work presents a unified framework that combines complex vision-language reasoning with multi-tool interaction.
overrides:
mmproj: WebWatcher-7B.mmproj-Q8_0.gguf
parameters:
model: WebWatcher-7B.Q4_K_M.gguf
files:
- filename: WebWatcher-7B.Q4_K_M.gguf
sha256: 300c76a51de59552f997ee7ee78ec519620931dea15c655111633b96de1a47f2
uri: huggingface://mradermacher/WebWatcher-7B-GGUF/WebWatcher-7B.Q4_K_M.gguf
- filename: WebWatcher-7B.mmproj-Q8_0.gguf
sha256: 841dc1bcc4f69ca864518d2c9a9a37b1815169d9bd061b054e091061124e4e62
uri: huggingface://mradermacher/WebWatcher-7B-GGUF/WebWatcher-7B.mmproj-Q8_0.gguf
- !!merge <<: *qwen25
name: "webwatcher-32b"
icon: https://huggingface.co/Alibaba-NLP/WebWatcher-32B/resolve/main/assets/webwatcher_logo.png
urls:
- https://huggingface.co/Alibaba-NLP/WebWatcher-32B
- https://huggingface.co/mradermacher/WebWatcher-32B-GGUF
description: |
WebWatcher is a multimodal agent for deep research that possesses enhanced visual-language reasoning capabilities. Our work presents a unified framework that combines complex vision-language reasoning with multi-tool interaction.
overrides:
mmproj: WebWatcher-32B.mmproj-Q8_0.gguf
parameters:
model: WebWatcher-32B.Q4_K_M.gguf
files:
- filename: WebWatcher-32B.Q4_K_M.gguf
sha256: 6cd51d97b9451759a4ce4ec0c2048b36ff99fd9f83bb32cd9f06af6c5438c69b
uri: huggingface://mradermacher/WebWatcher-32B-GGUF/WebWatcher-32B.Q4_K_M.gguf
- filename: WebWatcher-32B.mmproj-Q8_0.gguf
sha256: e8815515f71a959465cc62e08e0ef45d7d8592215139b34efece848552cb2327
uri: huggingface://mradermacher/WebWatcher-32B-GGUF/WebWatcher-32B.mmproj-Q8_0.gguf
- &llama31
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
icon: https://avatars.githubusercontent.com/u/153379578

View File

@@ -0,0 +1,45 @@
---
name: "qwen3"
config_file: |
mmap: true
backend: "llama-cpp"
template:
chat_message: |
<|im_start|>{{if eq .RoleName "tool" }}user{{else}}{{ .RoleName }}{{end}}
{{ if eq .RoleName "tool" -}}
<tool_response>
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if eq .RoleName "tool" -}}
</tool_response>
{{ end -}}
{{ if .FunctionCall -}}
<tool_call>
{{toJson .FunctionCall}}
</tool_call>
{{ end -}}<|im_end|>
function: |
<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant
chat: |
{{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- '<|im_end|>'
- '<dummy32000>'
- '</s>'
- '<|endoftext|>'

View File

@@ -23,10 +23,10 @@ var ErrUnsafeFilesFound = errors.New("unsafe files found")
func HuggingFaceScan(uri URI) (*HuggingFaceScanResult, error) {
cleanParts := strings.Split(uri.ResolveURL(), "/")
if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" {
if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" && cleanParts[2] != HF_ENDPOINT {
return nil, ErrNonHuggingFaceFile
}
results, err := http.Get(fmt.Sprintf("https://huggingface.co/api/models/%s/%s/scan", cleanParts[3], cleanParts[4]))
results, err := http.Get(fmt.Sprintf("%s/api/models/%s/%s/scan", HF_ENDPOINT, cleanParts[3], cleanParts[4]))
if err != nil {
return nil, err
}

View File

@@ -37,6 +37,17 @@ const (
type URI string
// HF_ENDPOINT is the HuggingFace endpoint, can be overridden by setting the HF_ENDPOINT environment variable.
var HF_ENDPOINT string = loadConfig()
func loadConfig() string {
HF_ENDPOINT := os.Getenv("HF_ENDPOINT")
if HF_ENDPOINT == "" {
HF_ENDPOINT = "https://huggingface.co"
}
return HF_ENDPOINT
}
func (uri URI) DownloadWithCallback(basePath string, f func(url string, i []byte) error) error {
return uri.DownloadWithAuthorizationAndCallback(basePath, "", f)
}
@@ -213,7 +224,7 @@ func (s URI) ResolveURL() string {
filepath = strings.Split(filepath, "@")[0]
}
return fmt.Sprintf("https://huggingface.co/%s/%s/resolve/%s/%s", owner, repo, branch, filepath)
return fmt.Sprintf("%s/%s/%s/resolve/%s/%s", HF_ENDPOINT, owner, repo, branch, filepath)
}
return string(s)

View File

@@ -169,6 +169,30 @@ var _ = Describe("E2E test", func() {
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
Expect(resp.Data[0].Embedding).ToNot(BeEmpty())
resp2, err := client.CreateEmbeddings(context.TODO(),
openai.EmbeddingRequestStrings{
Input: []string{"cat"},
Model: openai.AdaEmbeddingV2,
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp2.Data)).To(Equal(1), fmt.Sprint(resp))
Expect(resp2.Data[0].Embedding).ToNot(BeEmpty())
Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[0].Embedding))
resp3, err := client.CreateEmbeddings(context.TODO(),
openai.EmbeddingRequestStrings{
Input: []string{"doc", "cat"},
Model: openai.AdaEmbeddingV2,
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp3.Data)).To(Equal(2), fmt.Sprint(resp))
Expect(resp3.Data[0].Embedding).ToNot(BeEmpty())
Expect(resp3.Data[0].Embedding).To(Equal(resp.Data[0].Embedding))
Expect(resp3.Data[1].Embedding).To(Equal(resp2.Data[0].Embedding))
Expect(resp3.Data[0].Embedding).ToNot(Equal(resp3.Data[1].Embedding))
})
})
Context("vision", func() {