mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 16:51:44 -04:00
Compare commits
1 Commits
v2.18.0
...
timeout_te
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f41a519a2c |
6
.github/workflows/image.yml
vendored
6
.github/workflows/image.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 12 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Extra images
|
# Extra images
|
||||||
@@ -257,7 +257,6 @@ jobs:
|
|||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
@@ -317,10 +316,9 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan-ffmpeg-core'
|
tag-suffix: '-vulkan-ffmpeg-core'
|
||||||
latest-image: 'latest-vulkan-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
|||||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -220,7 +220,7 @@ jobs:
|
|||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.18
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
|
|||||||
28
Makefile
28
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
|||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=e57dc62057d41211ac018056c19c02cd544694df
|
CPPLLAMA_VERSION?=e112b610a1a75cb7fa8351e1a933e2e7a755a5ce
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
@@ -54,7 +54,7 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell gi
|
|||||||
|
|
||||||
OPTIONAL_TARGETS?=
|
OPTIONAL_TARGETS?=
|
||||||
|
|
||||||
export OS := $(shell uname -s)
|
OS := $(shell uname -s)
|
||||||
ARCH := $(shell uname -m)
|
ARCH := $(shell uname -m)
|
||||||
GREEN := $(shell tput -Txterm setaf 2)
|
GREEN := $(shell tput -Txterm setaf 2)
|
||||||
YELLOW := $(shell tput -Txterm setaf 3)
|
YELLOW := $(shell tput -Txterm setaf 3)
|
||||||
@@ -80,8 +80,8 @@ ifeq ($(OS),Darwin)
|
|||||||
BUILD_TYPE=metal
|
BUILD_TYPE=metal
|
||||||
# disable metal if on Darwin and any other value is explicitly passed.
|
# disable metal if on Darwin and any other value is explicitly passed.
|
||||||
else ifneq ($(BUILD_TYPE),metal)
|
else ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||||
export GGML_NO_ACCELERATE=1
|
export LLAMA_NO_ACCELERATE=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
@@ -98,13 +98,13 @@ endif
|
|||||||
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||||
export GGML_CUDA=1
|
export LLAMA_CUBLAS=1
|
||||||
export WHISPER_CUDA=1
|
export WHISPER_CUDA=1
|
||||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),vulkan)
|
ifeq ($(BUILD_TYPE),vulkan)
|
||||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
CMAKE_ARGS+=-DLLAMA_VULKAN=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),hipblas)
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
@@ -118,13 +118,13 @@ ifeq ($(BUILD_TYPE),hipblas)
|
|||||||
export WHISPER_HIPBLAS=1
|
export WHISPER_HIPBLAS=1
|
||||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||||
export GGML_METAL=1
|
export LLAMA_METAL=1
|
||||||
export WHISPER_METAL=1
|
export WHISPER_METAL=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -354,7 +354,7 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
dist-cross-linux-arm64:
|
dist-cross-linux-arm64:
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
||||||
STATIC=true $(MAKE) build
|
STATIC=true $(MAKE) build
|
||||||
mkdir -p release
|
mkdir -p release
|
||||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||||
@@ -711,21 +711,21 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
|||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
@@ -736,7 +736,7 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
|||||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||||
$(MAKE) -C backend/cpp/llama-cuda purge
|
$(MAKE) -C backend/cpp/llama-cuda purge
|
||||||
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
|
||||||
@@ -764,7 +764,7 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
|||||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
|
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||||
|
|
||||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||||
|
|||||||
@@ -4,44 +4,34 @@ LLAMA_VERSION?=
|
|||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
BUILD_TYPE?=
|
BUILD_TYPE?=
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
TARGET?=--target grpc-server
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
# to CMAKE_ARGS automatically
|
# to CMAKE_ARGS automatically
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
else ifeq ($(BUILD_TYPE),clblas)
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
# But if it's OSX without metal, disable it here
|
# But if it's OSX without metal, disable it here
|
||||||
else ifeq ($(OS),Darwin)
|
else ifeq ($(OS),darwin)
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
|
||||||
# Until this is tested properly, we disable embedded metal file
|
|
||||||
# as we already embed it as part of the LocalAI assets
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
endif
|
endif
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
@@ -72,8 +62,8 @@ grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
|||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
bash -c "source $(ONEAPI_VARS); \
|
bash -c "source $(ONEAPI_VARS); \
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
|
||||||
else
|
else
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
|
||||||
endif
|
endif
|
||||||
cp llama.cpp/build/bin/grpc-server .
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
@@ -886,8 +886,6 @@ struct llama_server_context
|
|||||||
{"task_id", slot->task_id},
|
{"task_id", slot->task_id},
|
||||||
});
|
});
|
||||||
|
|
||||||
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -142,14 +142,12 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
|
|||||||
MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
|
MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
|
||||||
Debug: *c.Debug,
|
Debug: *c.Debug,
|
||||||
StopPrompts: c.StopWords,
|
StopPrompts: c.StopWords,
|
||||||
Repeat: int32(c.RepeatLastN),
|
Repeat: int32(c.RepeatPenalty),
|
||||||
FrequencyPenalty: float32(c.FrequencyPenalty),
|
|
||||||
PresencePenalty: float32(c.PresencePenalty),
|
|
||||||
Penalty: float32(c.RepeatPenalty),
|
|
||||||
NKeep: int32(c.Keep),
|
NKeep: int32(c.Keep),
|
||||||
Batch: int32(c.Batch),
|
Batch: int32(c.Batch),
|
||||||
IgnoreEOS: c.IgnoreEOS,
|
IgnoreEOS: c.IgnoreEOS,
|
||||||
Seed: getSeed(c),
|
Seed: getSeed(c),
|
||||||
|
FrequencyPenalty: float32(c.FrequencyPenalty),
|
||||||
MLock: *c.MMlock,
|
MLock: *c.MMlock,
|
||||||
MMap: *c.MMap,
|
MMap: *c.MMap,
|
||||||
MainGPU: c.MainGPU,
|
MainGPU: c.MainGPU,
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ type TranscriptCMD struct {
|
|||||||
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
|
Backend string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
|
||||||
Model string `short:"m" required:"" help:"Model name to run the TTS"`
|
Model string `short:"m" required:"" help:"Model name to run the TTS"`
|
||||||
Language string `short:"l" help:"Language of the audio file"`
|
Language string `short:"l" help:"Language of the audio file"`
|
||||||
Translate bool `short:"c" help:"Translate the transcription to english"`
|
Translate bool `short:"t" help:"Translate the transcription to english"`
|
||||||
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
|
Threads int `short:"t" default:"1" help:"Number of threads used for parallel computation"`
|
||||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
galleryService *services.GalleryService,
|
galleryService *services.GalleryService,
|
||||||
auth func(*fiber.Ctx) error) {
|
auth func(*fiber.Ctx) error) {
|
||||||
tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
|
|
||||||
|
|
||||||
// keeps the state of models that are being installed from the UI
|
// keeps the state of models that are being installed from the UI
|
||||||
var processingModels = xsync.NewSyncedMap[string, string]()
|
var processingModels = xsync.NewSyncedMap[string, string]()
|
||||||
@@ -236,7 +235,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// Show the Chat page
|
// Show the Chat page
|
||||||
app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
|
app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := tmpLMS.ListModels("", true)
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI - Chat with " + c.Params("model"),
|
"Title": "LocalAI - Chat with " + c.Params("model"),
|
||||||
@@ -250,7 +249,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/talk/", auth, func(c *fiber.Ctx) error {
|
app.Get("/talk/", auth, func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := tmpLMS.ListModels("", true)
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
if len(backendConfigs) == 0 {
|
if len(backendConfigs) == 0 {
|
||||||
// If no model is available redirect to the index which suggests how to install models
|
// If no model is available redirect to the index which suggests how to install models
|
||||||
@@ -260,7 +259,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI - Talk",
|
"Title": "LocalAI - Talk",
|
||||||
"ModelsConfig": backendConfigs,
|
"ModelsConfig": backendConfigs,
|
||||||
"Model": backendConfigs[0].ID,
|
"Model": backendConfigs[0].Name,
|
||||||
"Version": internal.PrintableVersion(),
|
"Version": internal.PrintableVersion(),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -270,7 +269,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs, _ := tmpLMS.ListModels("", true)
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
if len(backendConfigs) == 0 {
|
if len(backendConfigs) == 0 {
|
||||||
// If no model is available redirect to the index which suggests how to install models
|
// If no model is available redirect to the index which suggests how to install models
|
||||||
@@ -278,9 +277,9 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
}
|
}
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI - Chat with " + backendConfigs[0].ID,
|
"Title": "LocalAI - Chat with " + backendConfigs[0].Name,
|
||||||
"ModelsConfig": backendConfigs,
|
"ModelsConfig": backendConfigs,
|
||||||
"Model": backendConfigs[0].ID,
|
"Model": backendConfigs[0].Name,
|
||||||
"Version": internal.PrintableVersion(),
|
"Version": internal.PrintableVersion(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -100,10 +100,10 @@ SOFTWARE.
|
|||||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||||
{{ $model:=.Model}}
|
{{ $model:=.Model}}
|
||||||
{{ range .ModelsConfig }}
|
{{ range .ModelsConfig }}
|
||||||
{{ if eq .ID $model }}
|
{{ if eq .Name $model }}
|
||||||
<option value="/chat/{{.ID}}" selected class="bg-gray-700 text-white">{{.ID}}</option>
|
<option value="/chat/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
|
||||||
{{ else }}
|
{{ else }}
|
||||||
<option value="/chat/{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
|
<option value="/chat/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
</select>
|
</select>
|
||||||
|
|||||||
@@ -62,7 +62,7 @@
|
|||||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||||
|
|
||||||
{{ range .ModelsConfig }}
|
{{ range .ModelsConfig }}
|
||||||
<option value="{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
|
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -25,10 +25,7 @@ type PredictionOptions struct {
|
|||||||
Batch int `json:"batch" yaml:"batch"`
|
Batch int `json:"batch" yaml:"batch"`
|
||||||
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
|
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
|
||||||
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
|
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
|
||||||
|
Keep int `json:"n_keep" yaml:"n_keep"`
|
||||||
RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
|
|
||||||
|
|
||||||
Keep int `json:"n_keep" yaml:"n_keep"`
|
|
||||||
|
|
||||||
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
|
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
|
||||||
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
|
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ And we convert it to the gguf format that LocalAI can consume:
|
|||||||
|
|
||||||
# Convert to gguf
|
# Convert to gguf
|
||||||
git clone https://github.com/ggerganov/llama.cpp.git
|
git clone https://github.com/ggerganov/llama.cpp.git
|
||||||
pushd llama.cpp && make GGML_CUDA=1 && popd
|
pushd llama.cpp && make LLAMA_CUBLAS=1 && popd
|
||||||
|
|
||||||
# We need to convert the pytorch model into ggml for quantization
|
# We need to convert the pytorch model into ggml for quantization
|
||||||
# It crates 'ggml-model-f16.bin' in the 'merged' directory.
|
# It crates 'ggml-model-f16.bin' in the 'merged' directory.
|
||||||
|
|||||||
@@ -55,4 +55,4 @@ This typically happens when your prompt exceeds the context size. Try to reduce
|
|||||||
|
|
||||||
### I'm getting a 'SIGILL' error, what's wrong?
|
### I'm getting a 'SIGILL' error, what's wrong?
|
||||||
|
|
||||||
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make build`
|
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make build`
|
||||||
@@ -101,14 +101,14 @@ Here is the list of the variables available that can be used to customize the bu
|
|||||||
LocalAI uses different backends based on ggml and llama.cpp to run models. If your CPU doesn't support common instruction sets, you can disable them during build:
|
LocalAI uses different backends based on ggml and llama.cpp to run models. If your CPU doesn't support common instruction sets, you can disable them during build:
|
||||||
|
|
||||||
```
|
```
|
||||||
CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" make build
|
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_AVX=OFF -DLLAMA_FMA=OFF" make build
|
||||||
```
|
```
|
||||||
|
|
||||||
To have effect on the container image, you need to set `REBUILD=true`:
|
To have effect on the container image, you need to set `REBUILD=true`:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run quay.io/go-skynet/localai
|
docker run quay.io/go-skynet/localai
|
||||||
docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -e REBUILD=true -e CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" -v $PWD/models:/models quay.io/go-skynet/local-ai:latest
|
docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -e REBUILD=true -e CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_AVX=OFF -DLLAMA_FMA=OFF" -v $PWD/models:/models quay.io/go-skynet/local-ai:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
{{% /alert %}}
|
{{% /alert %}}
|
||||||
|
|||||||
@@ -8,16 +8,6 @@ icon = "rocket_launch"
|
|||||||
|
|
||||||
**LocalAI** is a free, open-source alternative to OpenAI (Anthropic, etc.), functioning as a drop-in replacement REST API for local inferencing. It allows you to run [LLMs]({{% relref "docs/features/text-generation" %}}), generate images, and produce audio, all locally or on-premises with consumer-grade hardware, supporting multiple model families and architectures.
|
**LocalAI** is a free, open-source alternative to OpenAI (Anthropic, etc.), functioning as a drop-in replacement REST API for local inferencing. It allows you to run [LLMs]({{% relref "docs/features/text-generation" %}}), generate images, and produce audio, all locally or on-premises with consumer-grade hardware, supporting multiple model families and architectures.
|
||||||
|
|
||||||
{{% alert icon="💡" %}}
|
|
||||||
|
|
||||||
**Security considerations**
|
|
||||||
|
|
||||||
If you are exposing LocalAI remotely, make sure you protect the API endpoints adeguately with a mechanism which allows to protect from the incoming traffic or alternatively, run LocalAI with `API_KEY` to gate the access with an API key. The API key guarantees a total access to the features (there is no role separation), and it is to be considered as likely as an admin role.
|
|
||||||
|
|
||||||
To access the WebUI with an API_KEY, browser extensions such as [Requestly](https://requestly.com/) can be used (see also https://github.com/mudler/LocalAI/issues/2227#issuecomment-2093333752). See also [API flags]({{% relref "docs/advanced/advanced-usage#api-flags" %}}) for the flags / options available when starting LocalAI.
|
|
||||||
|
|
||||||
{{% /alert %}}
|
|
||||||
|
|
||||||
## Using the Bash Installer
|
## Using the Bash Installer
|
||||||
|
|
||||||
Install LocalAI easily using the bash installer with the following command:
|
Install LocalAI easily using the bash installer with the following command:
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ else
|
|||||||
echo "@@@@@"
|
echo "@@@@@"
|
||||||
echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true"
|
echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true"
|
||||||
echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:"
|
echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:"
|
||||||
echo 'CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF"'
|
echo 'CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF"'
|
||||||
echo "see the documentation at: https://localai.io/basics/build/index.html"
|
echo "see the documentation at: https://localai.io/basics/build/index.html"
|
||||||
echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288"
|
echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288"
|
||||||
echo "@@@@@"
|
echo "@@@@@"
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ And we convert it to the gguf format that LocalAI can consume:
|
|||||||
|
|
||||||
# Convert to gguf
|
# Convert to gguf
|
||||||
git clone https://github.com/ggerganov/llama.cpp.git
|
git clone https://github.com/ggerganov/llama.cpp.git
|
||||||
pushd llama.cpp && make GGML_CUDA=1 && popd
|
pushd llama.cpp && make LLAMA_CUBLAS=1 && popd
|
||||||
|
|
||||||
# We need to convert the pytorch model into ggml for quantization
|
# We need to convert the pytorch model into ggml for quantization
|
||||||
# It crates 'ggml-model-f16.bin' in the 'merged' directory.
|
# It crates 'ggml-model-f16.bin' in the 'merged' directory.
|
||||||
|
|||||||
@@ -1600,7 +1600,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"\n",
|
"\n",
|
||||||
"!git clone https://github.com/ggerganov/llama.cpp.git\n",
|
"!git clone https://github.com/ggerganov/llama.cpp.git\n",
|
||||||
"!cd llama.cpp && make GGML_CUDA=1\n",
|
"!cd llama.cpp && make LLAMA_CUBLAS=1\n",
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ version: "3"
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
api:
|
api:
|
||||||
image: quay.io/go-skynet/local-ai:latest
|
image: quay.io/go-skynet/local-ai:v1.18.0-ffmpeg
|
||||||
# As initially LocalAI will download the models defined in PRELOAD_MODELS
|
# As initially LocalAI will download the models defined in PRELOAD_MODELS
|
||||||
# you might need to tweak the healthcheck values here according to your network connection.
|
# you might need to tweak the healthcheck values here according to your network connection.
|
||||||
# Here we give a timespan of 20m to download all the required files.
|
# Here we give a timespan of 20m to download all the required files.
|
||||||
|
|||||||
@@ -92,41 +92,6 @@
|
|||||||
- filename: qwen2-1.5b-instruct-q8_0.gguf
|
- filename: qwen2-1.5b-instruct-q8_0.gguf
|
||||||
sha256: c9d33989d77f4bd6966084332087921b9613eda01d5f44dc0b4e9a7382a2bfbb
|
sha256: c9d33989d77f4bd6966084332087921b9613eda01d5f44dc0b4e9a7382a2bfbb
|
||||||
uri: huggingface://DeepMount00/Qwen2-1.5B-Ita-GGUF/qwen2-1.5b-instruct-q8_0.gguf
|
uri: huggingface://DeepMount00/Qwen2-1.5B-Ita-GGUF/qwen2-1.5b-instruct-q8_0.gguf
|
||||||
- !!merge <<: *qwen2
|
|
||||||
name: "einstein-v7-qwen2-7b"
|
|
||||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/KLQP1jK-DIzpwHzYRIH-Q.png
|
|
||||||
description: |
|
|
||||||
This model is a full fine-tuned version of Qwen/Qwen2-7B on diverse datasets.
|
|
||||||
urls:
|
|
||||||
- https://huggingface.co/Weyaxi/Einstein-v7-Qwen2-7B
|
|
||||||
- https://huggingface.co/bartowski/Einstein-v7-Qwen2-7B-GGUF
|
|
||||||
overrides:
|
|
||||||
parameters:
|
|
||||||
model: Einstein-v7-Qwen2-7B-Q4_K_M.gguf
|
|
||||||
files:
|
|
||||||
- filename: Einstein-v7-Qwen2-7B-Q4_K_M.gguf
|
|
||||||
sha256: 277b212ea65894723d2b86fb0f689fa5ecb54c9794f0fd2fb643655dc62812ce
|
|
||||||
uri: huggingface://bartowski/Einstein-v7-Qwen2-7B-GGUF/Einstein-v7-Qwen2-7B-Q4_K_M.gguf
|
|
||||||
- !!merge <<: *qwen2
|
|
||||||
name: "arcee-spark"
|
|
||||||
icon: https://i.ibb.co/80ssNWS/o-Vdk-Qx-ARNmzr-Pi1h-Efj-SA.webp
|
|
||||||
description: |
|
|
||||||
Arcee Spark is a powerful 7B parameter language model that punches well above its weight class. Initialized from Qwen2, this model underwent a sophisticated training process:
|
|
||||||
|
|
||||||
Fine-tuned on 1.8 million samples
|
|
||||||
Merged with Qwen2-7B-Instruct using Arcee's mergekit
|
|
||||||
Further refined using Direct Preference Optimization (DPO)
|
|
||||||
|
|
||||||
This meticulous process results in exceptional performance, with Arcee Spark achieving the highest score on MT-Bench for models of its size, outperforming even GPT-3.5 on many tasks.
|
|
||||||
urls:
|
|
||||||
- https://huggingface.co/arcee-ai/Arcee-Spark-GGUF
|
|
||||||
overrides:
|
|
||||||
parameters:
|
|
||||||
model: Arcee-Spark-Q4_K_M.gguf
|
|
||||||
files:
|
|
||||||
- filename: Arcee-Spark-Q4_K_M.gguf
|
|
||||||
sha256: 44123276d7845dc13f73ca4aa431dc4c931104eb7d2186f2a73d076fa0ee2330
|
|
||||||
uri: huggingface://arcee-ai/Arcee-Spark-GGUF/Arcee-Spark-Q4_K_M.gguf
|
|
||||||
- &mistral03
|
- &mistral03
|
||||||
## START Mistral
|
## START Mistral
|
||||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
||||||
@@ -312,34 +277,6 @@
|
|||||||
- filename: gemma-1.1-7b-it-Q4_K_M.gguf
|
- filename: gemma-1.1-7b-it-Q4_K_M.gguf
|
||||||
sha256: 47821da72ee9e80b6fd43c6190ad751b485fb61fa5664590f7a73246bcd8332e
|
sha256: 47821da72ee9e80b6fd43c6190ad751b485fb61fa5664590f7a73246bcd8332e
|
||||||
uri: huggingface://bartowski/gemma-1.1-7b-it-GGUF/gemma-1.1-7b-it-Q4_K_M.gguf
|
uri: huggingface://bartowski/gemma-1.1-7b-it-GGUF/gemma-1.1-7b-it-Q4_K_M.gguf
|
||||||
- !!merge <<: *gemma
|
|
||||||
name: "gemma-2-27b-it"
|
|
||||||
urls:
|
|
||||||
- https://huggingface.co/google/gemma-2-27b-it
|
|
||||||
- https://huggingface.co/bartowski/gemma-2-27b-it-GGUF
|
|
||||||
description: |
|
|
||||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
|
|
||||||
overrides:
|
|
||||||
parameters:
|
|
||||||
model: gemma-2-27b-it-Q4_K_M.gguf
|
|
||||||
files:
|
|
||||||
- filename: gemma-2-27b-it-Q4_K_M.gguf
|
|
||||||
sha256: e54e7b800d464af4fa9966020e4a1b1d386cd9346de2d851a7bfe7d0797c44c4
|
|
||||||
uri: huggingface://bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf
|
|
||||||
- !!merge <<: *gemma
|
|
||||||
name: "gemma-2-9b-it"
|
|
||||||
urls:
|
|
||||||
- https://huggingface.co/google/gemma-2-9b-it
|
|
||||||
- https://huggingface.co/bartowski/gemma-2-9b-it-GGUF
|
|
||||||
description: |
|
|
||||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
|
|
||||||
overrides:
|
|
||||||
parameters:
|
|
||||||
model: gemma-2-9b-it-Q4_K_M.gguf
|
|
||||||
files:
|
|
||||||
- filename: gemma-2-9b-it-Q4_K_M.gguf
|
|
||||||
sha256: 0874bf61be2e4b3d0a4a75e58fbd442dc410745d513c1e1e5de0b54ae33e65db
|
|
||||||
uri: huggingface://bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf
|
|
||||||
- &llama3
|
- &llama3
|
||||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||||
@@ -2045,25 +1982,6 @@
|
|||||||
- filename: Llama-3-Update-3.0-mmproj-model-f16.gguf
|
- filename: Llama-3-Update-3.0-mmproj-model-f16.gguf
|
||||||
sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a
|
sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a
|
||||||
uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf
|
uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf
|
||||||
- !!merge <<: *llama3
|
|
||||||
name: "llama3-8b-darkidol-1.2-iq-imatrix"
|
|
||||||
urls:
|
|
||||||
- https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-1.2-GGUF-IQ-Imatrix-Request
|
|
||||||
- https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2
|
|
||||||
description: |
|
|
||||||
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
|
|
||||||
icon: https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/resolve/main/llama3-8B-DarkIdol-1.2.png
|
|
||||||
overrides:
|
|
||||||
mmproj: Llama-3-Update-3.0-mmproj-model-f16.gguf
|
|
||||||
parameters:
|
|
||||||
model: llama3-8B-DarkIdol-1.2-Q4_K_M-imat.gguf
|
|
||||||
files:
|
|
||||||
- filename: llama3-8B-DarkIdol-1.2-Q4_K_M-imat.gguf
|
|
||||||
sha256: dce2f5f1661f49fb695b038d973770b0d9059bced4e4bb212f6517aa219131cd
|
|
||||||
uri: huggingface://LWDCLS/llama3-8B-DarkIdol-1.2-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-1.2-Q4_K_M-imat.gguf
|
|
||||||
- filename: Llama-3-Update-3.0-mmproj-model-f16.gguf
|
|
||||||
sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a
|
|
||||||
uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf
|
|
||||||
- &chatml
|
- &chatml
|
||||||
### ChatML
|
### ChatML
|
||||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
|
|||||||
@@ -42,9 +42,3 @@ func SetPrefix(suffix string) func(*GrammarOption) {
|
|||||||
o.Prefix = suffix
|
o.Prefix = suffix
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func SetPropOrder(order string) func(*GrammarOption) {
|
|
||||||
return func(o *GrammarOption) {
|
|
||||||
o.PropOrder = order
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -32,11 +32,6 @@ type GrammarConfig struct {
|
|||||||
|
|
||||||
// ExpectStringsAfterJSON enables mixed string suffix
|
// ExpectStringsAfterJSON enables mixed string suffix
|
||||||
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
|
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
|
||||||
|
|
||||||
// PropOrder selects what order to print properties
|
|
||||||
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
|
|
||||||
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
|
|
||||||
PropOrder string `yaml:"properties_order"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FunctionsConfig is the configuration for the tool/function call.
|
// FunctionsConfig is the configuration for the tool/function call.
|
||||||
@@ -109,8 +104,6 @@ func (g GrammarConfig) Options() []func(o *GrammarOption) {
|
|||||||
if g.ExpectStringsAfterJSON {
|
if g.ExpectStringsAfterJSON {
|
||||||
opts = append(opts, ExpectStringsAfterJSON)
|
opts = append(opts, ExpectStringsAfterJSON)
|
||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, SetPropOrder(g.PropOrder))
|
|
||||||
return opts
|
return opts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -701,9 +701,6 @@ const docTemplate = `{
|
|||||||
"prompt": {
|
"prompt": {
|
||||||
"description": "Prompt is read only by completion/image API calls"
|
"description": "Prompt is read only by completion/image API calls"
|
||||||
},
|
},
|
||||||
"repeat_last_n": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"repeat_penalty": {
|
"repeat_penalty": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
@@ -754,10 +751,6 @@ const docTemplate = `{
|
|||||||
"description": "Common options between all the API calls, part of the OpenAI spec",
|
"description": "Common options between all the API calls, part of the OpenAI spec",
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"translate": {
|
|
||||||
"description": "Only for audio transcription",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"typical_p": {
|
"typical_p": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -694,9 +694,6 @@
|
|||||||
"prompt": {
|
"prompt": {
|
||||||
"description": "Prompt is read only by completion/image API calls"
|
"description": "Prompt is read only by completion/image API calls"
|
||||||
},
|
},
|
||||||
"repeat_last_n": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"repeat_penalty": {
|
"repeat_penalty": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
@@ -747,10 +744,6 @@
|
|||||||
"description": "Common options between all the API calls, part of the OpenAI spec",
|
"description": "Common options between all the API calls, part of the OpenAI spec",
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"translate": {
|
|
||||||
"description": "Only for audio transcription",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"typical_p": {
|
"typical_p": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -292,8 +292,6 @@ definitions:
|
|||||||
type: number
|
type: number
|
||||||
prompt:
|
prompt:
|
||||||
description: Prompt is read only by completion/image API calls
|
description: Prompt is read only by completion/image API calls
|
||||||
repeat_last_n:
|
|
||||||
type: integer
|
|
||||||
repeat_penalty:
|
repeat_penalty:
|
||||||
type: number
|
type: number
|
||||||
response_format:
|
response_format:
|
||||||
@@ -330,9 +328,6 @@ definitions:
|
|||||||
description: Common options between all the API calls, part of the OpenAI
|
description: Common options between all the API calls, part of the OpenAI
|
||||||
spec
|
spec
|
||||||
type: number
|
type: number
|
||||||
translate:
|
|
||||||
description: Only for audio transcription
|
|
||||||
type: boolean
|
|
||||||
typical_p:
|
typical_p:
|
||||||
type: number
|
type: number
|
||||||
use_fast_tokenizer:
|
use_fast_tokenizer:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package e2e_test
|
package e2e_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
@@ -38,7 +39,7 @@ var _ = BeforeSuite(func() {
|
|||||||
|
|
||||||
var defaultConfig openai.ClientConfig
|
var defaultConfig openai.ClientConfig
|
||||||
if apiEndpoint == "" {
|
if apiEndpoint == "" {
|
||||||
startDockerImage()
|
startDockerImage("")
|
||||||
defaultConfig = openai.DefaultConfig(apiKey)
|
defaultConfig = openai.DefaultConfig(apiKey)
|
||||||
apiEndpoint = "http://localhost:" + apiPort + "/v1" // So that other tests can reference this value safely.
|
apiEndpoint = "http://localhost:" + apiPort + "/v1" // So that other tests can reference this value safely.
|
||||||
defaultConfig.BaseURL = apiEndpoint
|
defaultConfig.BaseURL = apiEndpoint
|
||||||
@@ -58,9 +59,41 @@ var _ = BeforeSuite(func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
var _ = AfterSuite(func() {
|
var _ = AfterSuite(func() {
|
||||||
|
|
||||||
|
// if the suite failed, logs will be printed
|
||||||
|
// to the console
|
||||||
|
if CurrentGinkgoTestDescription().Failed {
|
||||||
|
if resource != nil {
|
||||||
|
logs := bytes.NewBufferString("")
|
||||||
|
err := pool.Client.Logs(docker.LogsOptions{
|
||||||
|
Container: resource.Container.ID,
|
||||||
|
OutputStream: logs,
|
||||||
|
ErrorStream: logs,
|
||||||
|
Stdout: true,
|
||||||
|
Stderr: true,
|
||||||
|
Timestamps: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not take logs for failed suite", err.Error())
|
||||||
|
}
|
||||||
|
fmt.Println("Suite failed, printing logs")
|
||||||
|
fmt.Println(logs.String())
|
||||||
|
|
||||||
|
c, err := pool.Client.InspectContainer(resource.Container.ID)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not inspect container", err.Error())
|
||||||
|
}
|
||||||
|
fmt.Println("Container state")
|
||||||
|
fmt.Println("Running:", c.State.Running)
|
||||||
|
fmt.Println("ExitCode:", c.State.ExitCode)
|
||||||
|
fmt.Println("Error:", c.State.Error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if resource != nil {
|
if resource != nil {
|
||||||
Expect(pool.Purge(resource)).To(Succeed())
|
Expect(pool.Purge(resource)).To(Succeed())
|
||||||
}
|
}
|
||||||
|
|
||||||
//dat, err := os.ReadFile(resource.Container.LogPath)
|
//dat, err := os.ReadFile(resource.Container.LogPath)
|
||||||
//Expect(err).To(Not(HaveOccurred()))
|
//Expect(err).To(Not(HaveOccurred()))
|
||||||
//Expect(string(dat)).To(ContainSubstring("GRPC Service Ready"))
|
//Expect(string(dat)).To(ContainSubstring("GRPC Service Ready"))
|
||||||
@@ -71,8 +104,8 @@ var _ = AfterEach(func() {
|
|||||||
//Expect(dbClient.Clear()).To(Succeed())
|
//Expect(dbClient.Clear()).To(Succeed())
|
||||||
})
|
})
|
||||||
|
|
||||||
func startDockerImage() {
|
func startDockerImage(endpoint string) {
|
||||||
p, err := dockertest.NewPool("")
|
p, err := dockertest.NewPool(endpoint)
|
||||||
Expect(err).To(Not(HaveOccurred()))
|
Expect(err).To(Not(HaveOccurred()))
|
||||||
Expect(p.Client.Ping()).To(Succeed())
|
Expect(p.Client.Ping()).To(Succeed())
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user