mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
45 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a650e68cb | ||
|
|
5e1373877a | ||
|
|
b5b0ab26e7 | ||
|
|
9725bb4bbd | ||
|
|
33b4275bbc | ||
|
|
6644af10c6 | ||
|
|
7c4a2e9b85 | ||
|
|
bcccee3909 | ||
|
|
c6f50ddd0c | ||
|
|
6613373b1b | ||
|
|
1659b3f795 | ||
|
|
30600dd5cb | ||
|
|
179fcf5541 | ||
|
|
9cb75086bb | ||
|
|
594bb462ab | ||
|
|
aa730a7b96 | ||
|
|
0a454c527a | ||
|
|
cf86bcb984 | ||
|
|
a6d9988e84 | ||
|
|
f3a114342e | ||
|
|
0d275ccc03 | ||
|
|
58dba3f01c | ||
|
|
b68d6e8088 | ||
|
|
2352cec7e6 | ||
|
|
de72ae79b5 | ||
|
|
884c07d5f9 | ||
|
|
cca7cbef1e | ||
|
|
32cd0d03d4 | ||
|
|
ee4d9e83d0 | ||
|
|
5547e08a30 | ||
|
|
ca7385c303 | ||
|
|
28759e79d3 | ||
|
|
40249b6b84 | ||
|
|
e09e47bada | ||
|
|
3796558aeb | ||
|
|
cca4f010f8 | ||
|
|
be3ff482d0 | ||
|
|
af255cd0be | ||
|
|
8000228d1b | ||
|
|
79abe0ad77 | ||
|
|
8131d11d1f | ||
|
|
beb01c91f3 | ||
|
|
1ccd64ff6a | ||
|
|
fc7681c68c | ||
|
|
49d026a229 |
@@ -6,7 +6,6 @@ services:
|
||||
target: devcontainer
|
||||
args:
|
||||
- FFMPEG=true
|
||||
- IMAGE_TYPE=extras
|
||||
- GO_TAGS=p2p tts
|
||||
env_file:
|
||||
- ../.env
|
||||
|
||||
3
.github/workflows/backend_build.yml
vendored
3
.github/workflows/backend_build.yml
vendored
@@ -210,6 +210,7 @@ jobs:
|
||||
cache-from: type=gha
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
load: ${{ github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
@@ -240,10 +241,8 @@ jobs:
|
||||
- name: Latest tag
|
||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
||||
run: |
|
||||
docker pull localai/localai-backends:${{ steps.meta.outputs.version }}
|
||||
docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }}
|
||||
docker push localai/localai-backends:${{ inputs.latest-image }}
|
||||
docker pull quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }}
|
||||
docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
|
||||
docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
|
||||
|
||||
|
||||
2
.github/workflows/image-pr.yml
vendored
2
.github/workflows/image-pr.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
|
||||
29
.github/workflows/image.yml
vendored
29
.github/workflows/image.yml
vendored
@@ -28,8 +28,6 @@ jobs:
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
aio: ${{ matrix.aio }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
@@ -47,9 +45,7 @@ jobs:
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
aio: "-aio-gpu-hipblas"
|
||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
@@ -66,8 +62,6 @@ jobs:
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
@@ -86,8 +80,6 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
aio: "-aio-cpu"
|
||||
latest-image: 'latest-cpu'
|
||||
latest-image-aio: 'latest-aio-cpu'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
- build-type: 'cublas'
|
||||
@@ -95,29 +87,25 @@ jobs:
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
tag-suffix: '-gpu-nvidia-cuda11'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
latest-image: 'latest-gpu-nvidia-cuda-11'
|
||||
aio: "-aio-gpu-nvidia-cuda-11"
|
||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
tag-suffix: '-gpu-nvidia-cuda12'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||
aio: "-aio-gpu-nvidia-cuda-12"
|
||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
@@ -127,33 +115,27 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
latest-image: 'latest-gpu-vulkan'
|
||||
aio: "-aio-gpu-vulkan"
|
||||
latest-image-aio: 'latest-aio-gpu-vulkan'
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16'
|
||||
tag-suffix: '-gpu-intel-f16'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-intel-f16'
|
||||
aio: "-aio-gpu-intel-f16"
|
||||
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32'
|
||||
tag-suffix: '-gpu-intel-f32'
|
||||
ffmpeg: 'true'
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
latest-image: 'latest-gpu-intel-f32'
|
||||
aio: "-aio-gpu-intel-f32"
|
||||
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
||||
|
||||
gh-runner:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
@@ -170,8 +152,6 @@ jobs:
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
@@ -187,7 +167,6 @@ jobs:
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-nvidia-l4t-arm64'
|
||||
latest-image: 'latest-nvidia-l4t-arm64'
|
||||
ffmpeg: 'true'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
|
||||
41
.github/workflows/image_build.yml
vendored
41
.github/workflows/image_build.yml
vendored
@@ -33,14 +33,6 @@ on:
|
||||
description: 'Tag latest'
|
||||
default: ''
|
||||
type: string
|
||||
latest-image:
|
||||
description: 'Tag latest'
|
||||
default: ''
|
||||
type: string
|
||||
latest-image-aio:
|
||||
description: 'Tag latest'
|
||||
default: ''
|
||||
type: string
|
||||
tag-suffix:
|
||||
description: 'Tag suffix'
|
||||
default: ''
|
||||
@@ -164,7 +156,7 @@ jobs:
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
suffix=${{ inputs.tag-suffix }},onlatest=true
|
||||
- name: Docker meta for PR
|
||||
id: meta_pull_request
|
||||
if: github.event_name == 'pull_request'
|
||||
@@ -191,7 +183,7 @@ jobs:
|
||||
type=semver,pattern={{raw}}
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.aio }}
|
||||
suffix=${{ inputs.aio }},onlatest=true
|
||||
|
||||
- name: Docker meta AIO (dockerhub)
|
||||
if: inputs.aio != ''
|
||||
@@ -204,7 +196,8 @@ jobs:
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
flavor: |
|
||||
suffix=${{ inputs.aio }}
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.aio }},onlatest=true
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
@@ -316,32 +309,6 @@ jobs:
|
||||
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
||||
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
||||
|
||||
- name: Cleanup
|
||||
run: |
|
||||
docker builder prune -f
|
||||
docker system prune --force --volumes --all
|
||||
|
||||
- name: Latest tag
|
||||
# run this on branches, when it is a tag and there is a latest-image defined
|
||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
||||
run: |
|
||||
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
|
||||
docker push localai/localai:${{ inputs.latest-image }}
|
||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
||||
- name: Latest AIO tag
|
||||
# run this on branches, when it is a tag and there is a latest-image defined
|
||||
if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
|
||||
run: |
|
||||
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
|
||||
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
|
||||
docker push localai/localai:${{ inputs.latest-image-aio }}
|
||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||
|
||||
- name: job summary
|
||||
run: |
|
||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
24
.github/workflows/stalebot.yml
vendored
Normal file
24
.github/workflows/stalebot.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
name: 'Close stale issues and PRs'
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
on:
|
||||
schedule:
|
||||
- cron: '30 1 * * *'
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
|
||||
with:
|
||||
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
|
||||
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
|
||||
close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
|
||||
close-pr-message: 'This PR was closed because it has been stalled for 10 days with no activity.'
|
||||
days-before-issue-stale: 90
|
||||
days-before-pr-stale: 90
|
||||
days-before-issue-close: 5
|
||||
days-before-pr-close: 10
|
||||
exempt-issue-labels: 'roadmap'
|
||||
exempt-pr-labels: 'roadmap'
|
||||
203
Dockerfile
203
Dockerfile
@@ -2,82 +2,17 @@ ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM ${BASE_IMAGE} AS requirements
|
||||
|
||||
USER root
|
||||
|
||||
ARG GO_VERSION=1.22.6
|
||||
ARG CMAKE_VERSION=3.26.4
|
||||
ARG CMAKE_FROM_SOURCE=false
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ccache \
|
||||
ca-certificates espeak-ng \
|
||||
curl libssl-dev \
|
||||
git \
|
||||
git-lfs \
|
||||
unzip upx-ucl python3 python-is-python3 && \
|
||||
ca-certificates curl wget espeak-ng libgomp1 \
|
||||
python3 python-is-python3 ffmpeg && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install CMake (the version in 22.04 is too old)
|
||||
RUN <<EOT bash
|
||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||
else
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers and rice
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
|
||||
RUN test -n "$TARGETARCH" \
|
||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||
|
||||
# Use the variables in subsequent instructions
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
RUN echo "Target Variant: $TARGETVARIANT"
|
||||
|
||||
# Cuda
|
||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||
|
||||
# HipBLAS requirements
|
||||
ENV PATH=/opt/rocm/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
FROM requirements AS requirements-drivers
|
||||
|
||||
@@ -85,7 +20,8 @@ ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
ARG SKIP_DRIVERS=false
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# Vulkan requirements
|
||||
@@ -152,6 +88,83 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||
ldconfig \
|
||||
; fi
|
||||
|
||||
# Cuda
|
||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||
|
||||
# HipBLAS requirements
|
||||
ENV PATH=/opt/rocm/bin:${PATH}
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM requirements-drivers AS build-requirements
|
||||
|
||||
ARG GO_VERSION=1.22.6
|
||||
ARG CMAKE_VERSION=3.26.4
|
||||
ARG CMAKE_FROM_SOURCE=false
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ccache \
|
||||
ca-certificates espeak-ng \
|
||||
curl libssl-dev \
|
||||
git \
|
||||
git-lfs \
|
||||
unzip upx-ucl python3 python-is-python3 && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install CMake (the version in 22.04 is too old)
|
||||
RUN <<EOT bash
|
||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||
else
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers and rice
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
||||
go install github.com/GeertJohan/go.rice/rice@latest
|
||||
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN test -n "$TARGETARCH" \
|
||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||
|
||||
# Use the variables in subsequent instructions
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
RUN echo "Target Variant: $TARGETVARIANT"
|
||||
|
||||
|
||||
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
@@ -218,13 +231,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
|
||||
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
||||
|
||||
FROM requirements-drivers AS builder-base
|
||||
FROM build-requirements AS builder-base
|
||||
|
||||
ARG GO_TAGS="tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
ARG LD_FLAGS="-s -w"
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||
ENV GO_TAGS=${GO_TAGS}
|
||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
@@ -259,6 +273,8 @@ EOT
|
||||
|
||||
# Compile backends first in a separate stage
|
||||
FROM builder-base AS builder-backends
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
@@ -314,24 +330,13 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
|
||||
FROM builder-base AS devcontainer
|
||||
|
||||
ARG FFMPEG
|
||||
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ffmpeg && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ssh less wget
|
||||
ssh less
|
||||
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
||||
|
||||
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
||||
@@ -345,40 +350,16 @@ RUN go install github.com/mikefarah/yq/v4@latest
|
||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||
FROM requirements-drivers
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
ARG TARGETARCH
|
||||
ARG MAKEFLAGS
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV REBUILD=false
|
||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ffmpeg && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
WORKDIR /
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
|
||||
# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
|
||||
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
|
||||
# https://github.com/go-skynet/LocalAI/pull/434
|
||||
COPY . .
|
||||
|
||||
COPY --from=builder /build/sources ./sources/
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
COPY ./entrypoint.sh .
|
||||
|
||||
# Copy the binary
|
||||
COPY --from=builder /build/local-ai ./
|
||||
@@ -387,12 +368,12 @@ COPY --from=builder /build/local-ai ./
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# Make sure the models directory exists
|
||||
RUN mkdir -p /build/models /build/backends
|
||||
RUN mkdir -p /models /backends
|
||||
|
||||
# Define the health check command
|
||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||
|
||||
VOLUME /build/models /build/backends
|
||||
VOLUME /models /backends
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||
|
||||
4
Makefile
4
Makefile
@@ -6,11 +6,11 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
CPPLLAMA_VERSION?=8d947136546773f6410756f37fcc5d3e65b8135d
|
||||
CPPLLAMA_VERSION?=8846aace4934ad29651ea61b8c7e3f6b0556e3d2
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=ecb8f3c2b4e282d5ef416516bcbfb92821f06bf6
|
||||
WHISPER_CPP_VERSION?=32cf4e2aba799aff069011f37ca025401433cf9f
|
||||
|
||||
# go-piper version
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
|
||||
21
README.md
21
README.md
@@ -121,18 +121,12 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
### NVIDIA GPU Images:
|
||||
|
||||
```bash
|
||||
# CUDA 12.0 with core features
|
||||
# CUDA 12.0
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
|
||||
# CUDA 12.0 with extra Python dependencies
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12-extras
|
||||
|
||||
# CUDA 11.7 with core features
|
||||
# CUDA 11.7
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
|
||||
|
||||
# CUDA 11.7 with extra Python dependencies
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11-extras
|
||||
|
||||
# NVIDIA Jetson (L4T) ARM64
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
|
||||
```
|
||||
@@ -140,11 +134,7 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nv
|
||||
### AMD GPU Images (ROCm):
|
||||
|
||||
```bash
|
||||
# ROCm with core features
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
|
||||
|
||||
# ROCm with extra Python dependencies
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas-extras
|
||||
```
|
||||
|
||||
### Intel GPU Images (oneAPI):
|
||||
@@ -153,20 +143,13 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
|
||||
# Intel GPU with FP16 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
|
||||
|
||||
# Intel GPU with FP16 support and extra dependencies
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16-extras
|
||||
|
||||
# Intel GPU with FP32 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
|
||||
|
||||
# Intel GPU with FP32 support and extra dependencies
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32-extras
|
||||
```
|
||||
|
||||
### Vulkan GPU Images:
|
||||
|
||||
```bash
|
||||
# Vulkan with core features
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
|
||||
```
|
||||
|
||||
|
||||
@@ -135,4 +135,4 @@ check_vars
|
||||
|
||||
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
|
||||
|
||||
exec /build/entrypoint.sh "$@"
|
||||
exec /entrypoint.sh "$@"
|
||||
|
||||
@@ -2,10 +2,25 @@ package gallery
|
||||
|
||||
import "github.com/mudler/LocalAI/core/config"
|
||||
|
||||
// BackendMetadata represents the metadata stored in a JSON file for each installed backend
|
||||
type BackendMetadata struct {
|
||||
// Alias is an optional alternative name for the backend
|
||||
Alias string `json:"alias,omitempty"`
|
||||
// MetaBackendFor points to the concrete backend if this is a meta backend
|
||||
MetaBackendFor string `json:"meta_backend_for,omitempty"`
|
||||
// Name is the original name from the gallery
|
||||
Name string `json:"name,omitempty"`
|
||||
// GalleryURL is the URL of the gallery this backend came from
|
||||
GalleryURL string `json:"gallery_url,omitempty"`
|
||||
// InstalledAt is the timestamp when the backend was installed
|
||||
InstalledAt string `json:"installed_at,omitempty"`
|
||||
}
|
||||
|
||||
type GalleryBackend struct {
|
||||
Metadata `json:",inline" yaml:",inline"`
|
||||
Alias string `json:"alias,omitempty" yaml:"alias,omitempty"`
|
||||
URI string `json:"uri,omitempty" yaml:"uri,omitempty"`
|
||||
Metadata `json:",inline" yaml:",inline"`
|
||||
Alias string `json:"alias,omitempty" yaml:"alias,omitempty"`
|
||||
URI string `json:"uri,omitempty" yaml:"uri,omitempty"`
|
||||
CapabilitiesMap map[string]string `json:"capabilities,omitempty" yaml:"capabilities,omitempty"`
|
||||
}
|
||||
|
||||
type GalleryBackends []*GalleryBackend
|
||||
@@ -14,6 +29,10 @@ func (m *GalleryBackend) SetGallery(gallery config.Gallery) {
|
||||
m.Gallery = gallery
|
||||
}
|
||||
|
||||
func (m *GalleryBackend) IsMeta() bool {
|
||||
return len(m.CapabilitiesMap) > 0
|
||||
}
|
||||
|
||||
func (m *GalleryBackend) SetInstalled(installed bool) {
|
||||
m.Installed = installed
|
||||
}
|
||||
|
||||
@@ -1,17 +1,79 @@
|
||||
package gallery
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/oci"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const (
|
||||
metadataFile = "metadata.json"
|
||||
runFile = "run.sh"
|
||||
)
|
||||
|
||||
// readBackendMetadata reads the metadata JSON file for a backend
|
||||
func readBackendMetadata(backendPath string) (*BackendMetadata, error) {
|
||||
metadataPath := filepath.Join(backendPath, metadataFile)
|
||||
|
||||
// If metadata file doesn't exist, return nil (for backward compatibility)
|
||||
if _, err := os.Stat(metadataPath); os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(metadataPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read metadata file %q: %v", metadataPath, err)
|
||||
}
|
||||
|
||||
var metadata BackendMetadata
|
||||
if err := json.Unmarshal(data, &metadata); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal metadata file %q: %v", metadataPath, err)
|
||||
}
|
||||
|
||||
return &metadata, nil
|
||||
}
|
||||
|
||||
// writeBackendMetadata writes the metadata JSON file for a backend
|
||||
func writeBackendMetadata(backendPath string, metadata *BackendMetadata) error {
|
||||
metadataPath := filepath.Join(backendPath, metadataFile)
|
||||
|
||||
data, err := json.MarshalIndent(metadata, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal metadata: %v", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(metadataPath, data, 0644); err != nil {
|
||||
return fmt.Errorf("failed to write metadata file %q: %v", metadataPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func findBestBackendFromMeta(backend *GalleryBackend, systemState *system.SystemState, backends GalleryElements[*GalleryBackend]) *GalleryBackend {
|
||||
if systemState == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
realBackend := backend.CapabilitiesMap[systemState.GPUVendor]
|
||||
if realBackend == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return backends.FindByName(realBackend)
|
||||
}
|
||||
|
||||
// Installs a model from the gallery
|
||||
func InstallBackendFromGallery(galleries []config.Gallery, name string, basePath string, downloadStatus func(string, string, string, float64)) error {
|
||||
func InstallBackendFromGallery(galleries []config.Gallery, systemState *system.SystemState, name string, basePath string, downloadStatus func(string, string, string, float64)) error {
|
||||
log.Debug().Interface("galleries", galleries).Str("name", name).Msg("Installing backend from gallery")
|
||||
|
||||
backends, err := AvailableBackends(galleries, basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -19,7 +81,44 @@ func InstallBackendFromGallery(galleries []config.Gallery, name string, basePath
|
||||
|
||||
backend := FindGalleryElement(backends, name, basePath)
|
||||
if backend == nil {
|
||||
return fmt.Errorf("no model found with name %q", name)
|
||||
return fmt.Errorf("no backend found with name %q", name)
|
||||
}
|
||||
|
||||
if backend.IsMeta() {
|
||||
log.Debug().Interface("systemState", systemState).Str("name", name).Msg("Backend is a meta backend")
|
||||
|
||||
// Then, let's try to find the best backend based on the capabilities map
|
||||
bestBackend := findBestBackendFromMeta(backend, systemState, backends)
|
||||
if bestBackend == nil {
|
||||
return fmt.Errorf("no backend found with capabilities %q", backend.CapabilitiesMap)
|
||||
}
|
||||
|
||||
log.Debug().Str("name", name).Str("bestBackend", bestBackend.Name).Msg("Installing backend from meta backend")
|
||||
|
||||
// Then, let's install the best backend
|
||||
if err := InstallBackend(basePath, bestBackend, downloadStatus); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// we need now to create a path for the meta backend, with the alias to the installed ones so it can be used to remove it
|
||||
metaBackendPath := filepath.Join(basePath, name)
|
||||
if err := os.MkdirAll(metaBackendPath, 0750); err != nil {
|
||||
return fmt.Errorf("failed to create meta backend path %q: %v", metaBackendPath, err)
|
||||
}
|
||||
|
||||
// Create metadata for the meta backend
|
||||
metaMetadata := &BackendMetadata{
|
||||
MetaBackendFor: bestBackend.Name,
|
||||
Name: name,
|
||||
GalleryURL: backend.Gallery.URL,
|
||||
InstalledAt: time.Now().Format(time.RFC3339),
|
||||
}
|
||||
|
||||
if err := writeBackendMetadata(metaBackendPath, metaMetadata); err != nil {
|
||||
return fmt.Errorf("failed to write metadata for meta backend %q: %v", name, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return InstallBackend(basePath, backend, downloadStatus)
|
||||
@@ -32,6 +131,10 @@ func InstallBackend(basePath string, config *GalleryBackend, downloadStatus func
|
||||
return fmt.Errorf("failed to create base path: %v", err)
|
||||
}
|
||||
|
||||
if config.IsMeta() {
|
||||
return fmt.Errorf("meta backends cannot be installed directly")
|
||||
}
|
||||
|
||||
name := config.Name
|
||||
|
||||
img, err := oci.GetImage(config.URI, "", nil, nil)
|
||||
@@ -48,21 +151,73 @@ func InstallBackend(basePath string, config *GalleryBackend, downloadStatus func
|
||||
return fmt.Errorf("failed to extract image %q: %v", config.URI, err)
|
||||
}
|
||||
|
||||
// Create metadata for the backend
|
||||
metadata := &BackendMetadata{
|
||||
Name: name,
|
||||
GalleryURL: config.Gallery.URL,
|
||||
InstalledAt: time.Now().Format(time.RFC3339),
|
||||
}
|
||||
|
||||
if config.Alias != "" {
|
||||
// Write an alias file inside
|
||||
aliasFile := filepath.Join(backendPath, "alias")
|
||||
if err := os.WriteFile(aliasFile, []byte(config.Alias), 0644); err != nil {
|
||||
return fmt.Errorf("failed to write alias file %q: %v", aliasFile, err)
|
||||
}
|
||||
metadata.Alias = config.Alias
|
||||
}
|
||||
|
||||
if err := writeBackendMetadata(backendPath, metadata); err != nil {
|
||||
return fmt.Errorf("failed to write metadata for backend %q: %v", name, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func DeleteBackendFromSystem(basePath string, name string) error {
|
||||
backendFile := filepath.Join(basePath, name)
|
||||
backendDirectory := filepath.Join(basePath, name)
|
||||
|
||||
return os.RemoveAll(backendFile)
|
||||
// check if the backend dir exists
|
||||
if _, err := os.Stat(backendDirectory); os.IsNotExist(err) {
|
||||
// if doesn't exist, it might be an alias, so we need to check if we have a matching alias in
|
||||
// all the backends in the basePath
|
||||
backends, err := os.ReadDir(basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
foundBackend := false
|
||||
|
||||
for _, backend := range backends {
|
||||
if backend.IsDir() {
|
||||
metadata, err := readBackendMetadata(filepath.Join(basePath, backend.Name()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if metadata != nil && metadata.Alias == name {
|
||||
backendDirectory = filepath.Join(basePath, backend.Name())
|
||||
foundBackend = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no backend found, return successfully (idempotent behavior)
|
||||
if !foundBackend {
|
||||
return fmt.Errorf("no backend found with name %q", name)
|
||||
}
|
||||
}
|
||||
|
||||
// If it's a meta backend, delete also associated backend
|
||||
metadata, err := readBackendMetadata(backendDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if metadata != nil && metadata.MetaBackendFor != "" {
|
||||
metaBackendDirectory := filepath.Join(basePath, metadata.MetaBackendFor)
|
||||
log.Debug().Str("backendDirectory", metaBackendDirectory).Msg("Deleting meta backend")
|
||||
if _, err := os.Stat(metaBackendDirectory); os.IsNotExist(err) {
|
||||
return fmt.Errorf("meta backend %q not found", metadata.MetaBackendFor)
|
||||
}
|
||||
os.RemoveAll(metaBackendDirectory)
|
||||
}
|
||||
|
||||
return os.RemoveAll(backendDirectory)
|
||||
}
|
||||
|
||||
func ListSystemBackends(basePath string) (map[string]string, error) {
|
||||
@@ -75,17 +230,16 @@ func ListSystemBackends(basePath string) (map[string]string, error) {
|
||||
|
||||
for _, backend := range backends {
|
||||
if backend.IsDir() {
|
||||
runFile := filepath.Join(basePath, backend.Name(), "run.sh")
|
||||
runFile := filepath.Join(basePath, backend.Name(), runFile)
|
||||
backendsNames[backend.Name()] = runFile
|
||||
|
||||
aliasFile := filepath.Join(basePath, backend.Name(), "alias")
|
||||
if _, err := os.Stat(aliasFile); err == nil {
|
||||
// read the alias file, and use it as key
|
||||
alias, err := os.ReadFile(aliasFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
backendsNames[string(alias)] = runFile
|
||||
// Check for alias in metadata
|
||||
metadata, err := readBackendMetadata(filepath.Join(basePath, backend.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if metadata != nil && metadata.Alias != "" {
|
||||
backendsNames[metadata.Alias] = runFile
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,19 @@
|
||||
package gallery
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
const (
|
||||
testImage = "quay.io/mudler/tests:localai-backend-test"
|
||||
)
|
||||
|
||||
var _ = Describe("Gallery Backends", func() {
|
||||
@@ -35,18 +42,209 @@ var _ = Describe("Gallery Backends", func() {
|
||||
|
||||
Describe("InstallBackendFromGallery", func() {
|
||||
It("should return error when backend is not found", func() {
|
||||
err := InstallBackendFromGallery(galleries, "non-existent", tempDir, nil)
|
||||
err := InstallBackendFromGallery(galleries, nil, "non-existent", tempDir, nil)
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("no model found with name"))
|
||||
Expect(err.Error()).To(ContainSubstring("no backend found with name \"non-existent\""))
|
||||
})
|
||||
|
||||
It("should install backend from gallery", func() {
|
||||
err := InstallBackendFromGallery(galleries, "test-backend", tempDir, nil)
|
||||
err := InstallBackendFromGallery(galleries, nil, "test-backend", tempDir, nil)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(filepath.Join(tempDir, "test-backend", "run.sh")).To(BeARegularFile())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Meta Backends", func() {
|
||||
It("should identify meta backends correctly", func() {
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
},
|
||||
CapabilitiesMap: map[string]string{
|
||||
"nvidia": "nvidia-backend",
|
||||
"amd": "amd-backend",
|
||||
"intel": "intel-backend",
|
||||
},
|
||||
}
|
||||
|
||||
Expect(metaBackend.IsMeta()).To(BeTrue())
|
||||
|
||||
regularBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "regular-backend",
|
||||
},
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
Expect(regularBackend.IsMeta()).To(BeFalse())
|
||||
|
||||
emptyMetaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "empty-meta-backend",
|
||||
},
|
||||
CapabilitiesMap: map[string]string{},
|
||||
}
|
||||
|
||||
Expect(emptyMetaBackend.IsMeta()).To(BeFalse())
|
||||
|
||||
nilMetaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "nil-meta-backend",
|
||||
},
|
||||
CapabilitiesMap: nil,
|
||||
}
|
||||
|
||||
Expect(nilMetaBackend.IsMeta()).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should find best backend from meta based on system capabilities", func() {
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
},
|
||||
CapabilitiesMap: map[string]string{
|
||||
"nvidia": "nvidia-backend",
|
||||
"amd": "amd-backend",
|
||||
"intel": "intel-backend",
|
||||
},
|
||||
}
|
||||
|
||||
nvidiaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "nvidia-backend",
|
||||
},
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
amdBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "amd-backend",
|
||||
},
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
backends := GalleryElements[*GalleryBackend]{nvidiaBackend, amdBackend}
|
||||
|
||||
// Test with NVIDIA system state
|
||||
nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
|
||||
bestBackend := findBestBackendFromMeta(metaBackend, nvidiaSystemState, backends)
|
||||
Expect(bestBackend).To(Equal(nvidiaBackend))
|
||||
|
||||
// Test with AMD system state
|
||||
amdSystemState := &system.SystemState{GPUVendor: "amd"}
|
||||
bestBackend = findBestBackendFromMeta(metaBackend, amdSystemState, backends)
|
||||
Expect(bestBackend).To(Equal(amdBackend))
|
||||
|
||||
// Test with unsupported GPU vendor
|
||||
unsupportedSystemState := &system.SystemState{GPUVendor: "unsupported"}
|
||||
bestBackend = findBestBackendFromMeta(metaBackend, unsupportedSystemState, backends)
|
||||
Expect(bestBackend).To(BeNil())
|
||||
})
|
||||
|
||||
It("should handle meta backend deletion correctly", func() {
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
},
|
||||
CapabilitiesMap: map[string]string{
|
||||
"nvidia": "nvidia-backend",
|
||||
"amd": "amd-backend",
|
||||
"intel": "intel-backend",
|
||||
},
|
||||
}
|
||||
|
||||
nvidiaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "nvidia-backend",
|
||||
},
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
amdBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "amd-backend",
|
||||
},
|
||||
URI: testImage,
|
||||
}
|
||||
|
||||
gallery := config.Gallery{
|
||||
Name: "test-gallery",
|
||||
URL: "file://" + filepath.Join(tempDir, "backend-gallery.yaml"),
|
||||
}
|
||||
|
||||
galleryBackend := GalleryBackends{amdBackend, nvidiaBackend, metaBackend}
|
||||
|
||||
dat, err := yaml.Marshal(galleryBackend)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(tempDir, "backend-gallery.yaml"), dat, 0644)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Test with NVIDIA system state
|
||||
nvidiaSystemState := &system.SystemState{GPUVendor: "nvidia"}
|
||||
err = InstallBackendFromGallery([]config.Gallery{gallery}, nvidiaSystemState, "meta-backend", tempDir, nil)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
metaBackendPath := filepath.Join(tempDir, "meta-backend")
|
||||
Expect(metaBackendPath).To(BeADirectory())
|
||||
|
||||
concreteBackendPath := filepath.Join(tempDir, "nvidia-backend")
|
||||
Expect(concreteBackendPath).To(BeADirectory())
|
||||
|
||||
allBackends, err := ListSystemBackends(tempDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(allBackends).To(HaveKey("meta-backend"))
|
||||
Expect(allBackends).To(HaveKey("nvidia-backend"))
|
||||
|
||||
// Delete meta backend by name
|
||||
err = DeleteBackendFromSystem(tempDir, "meta-backend")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Verify meta backend directory is deleted
|
||||
Expect(metaBackendPath).NotTo(BeADirectory())
|
||||
|
||||
// Verify concrete backend directory is deleted
|
||||
Expect(concreteBackendPath).NotTo(BeADirectory())
|
||||
})
|
||||
|
||||
It("should list meta backends correctly in system backends", func() {
|
||||
// Create a meta backend directory with metadata
|
||||
metaBackendPath := filepath.Join(tempDir, "meta-backend")
|
||||
err := os.MkdirAll(metaBackendPath, 0750)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Create metadata file pointing to concrete backend
|
||||
metadata := &BackendMetadata{
|
||||
MetaBackendFor: "concrete-backend",
|
||||
Name: "meta-backend",
|
||||
InstalledAt: "2023-01-01T00:00:00Z",
|
||||
}
|
||||
metadataData, err := json.Marshal(metadata)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(metaBackendPath, "metadata.json"), metadataData, 0644)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Create the concrete backend directory with run.sh
|
||||
concreteBackendPath := filepath.Join(tempDir, "concrete-backend")
|
||||
err = os.MkdirAll(concreteBackendPath, 0750)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(concreteBackendPath, "run.sh"), []byte("#!/bin/bash"), 0755)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// List system backends
|
||||
backends, err := ListSystemBackends(tempDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Should include both the meta backend name and concrete backend name
|
||||
Expect(backends).To(HaveKey("meta-backend"))
|
||||
Expect(backends).To(HaveKey("concrete-backend"))
|
||||
|
||||
// meta-backend should point to its own run.sh
|
||||
Expect(backends["meta-backend"]).To(Equal(filepath.Join(tempDir, "meta-backend", "run.sh")))
|
||||
// concrete-backend should point to its own run.sh
|
||||
Expect(backends["concrete-backend"]).To(Equal(filepath.Join(tempDir, "concrete-backend", "run.sh")))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("InstallBackend", func() {
|
||||
It("should create base path if it doesn't exist", func() {
|
||||
newPath := filepath.Join(tempDir, "new-path")
|
||||
@@ -73,10 +271,17 @@ var _ = Describe("Gallery Backends", func() {
|
||||
|
||||
err := InstallBackend(tempDir, &backend, nil)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(filepath.Join(tempDir, "test-backend", "alias")).To(BeARegularFile())
|
||||
content, err := os.ReadFile(filepath.Join(tempDir, "test-backend", "alias"))
|
||||
Expect(filepath.Join(tempDir, "test-backend", "metadata.json")).To(BeARegularFile())
|
||||
|
||||
// Read and verify metadata
|
||||
metadataData, err := os.ReadFile(filepath.Join(tempDir, "test-backend", "metadata.json"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(string(content)).To(ContainSubstring("test-alias"))
|
||||
var metadata BackendMetadata
|
||||
err = json.Unmarshal(metadataData, &metadata)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(metadata.Alias).To(Equal("test-alias"))
|
||||
Expect(metadata.Name).To(Equal("test-backend"))
|
||||
|
||||
Expect(filepath.Join(tempDir, "test-backend", "run.sh")).To(BeARegularFile())
|
||||
|
||||
// Check that the alias was recognized
|
||||
@@ -103,7 +308,7 @@ var _ = Describe("Gallery Backends", func() {
|
||||
|
||||
It("should not error when backend doesn't exist", func() {
|
||||
err := DeleteBackendFromSystem(tempDir, "non-existent")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
@@ -134,8 +339,15 @@ var _ = Describe("Gallery Backends", func() {
|
||||
err := os.MkdirAll(backendPath, 0750)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Create alias file
|
||||
err = os.WriteFile(filepath.Join(backendPath, "alias"), []byte(alias), 0644)
|
||||
// Create metadata file with alias
|
||||
metadata := &BackendMetadata{
|
||||
Alias: alias,
|
||||
Name: backendName,
|
||||
InstalledAt: "2023-01-01T00:00:00Z",
|
||||
}
|
||||
metadataData, err := json.Marshal(metadata)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(backendPath, "metadata.json"), metadataData, 0644)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
backends, err := ListSystemBackends(tempDir)
|
||||
|
||||
@@ -205,7 +205,10 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
|
||||
galleryService := services.NewGalleryService(application.ApplicationConfig(), application.ModelLoader())
|
||||
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
||||
err = galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
|
||||
@@ -2,12 +2,13 @@ package services
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend]) error {
|
||||
func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error {
|
||||
utils.ResetDownloadTimers()
|
||||
g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
|
||||
|
||||
@@ -23,13 +24,17 @@ func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend]) e
|
||||
g.modelLoader.DeleteExternalBackend(op.GalleryElementName)
|
||||
} else {
|
||||
log.Warn().Msgf("installing backend %s", op.GalleryElementName)
|
||||
err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback)
|
||||
err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, systemState, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback)
|
||||
if err == nil {
|
||||
err = gallery.RegisterBackends(g.appConfig.BackendsPath, g.modelLoader)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("error installing backend %s", op.GalleryElementName)
|
||||
if !op.Delete {
|
||||
// If we didn't install the backend, we need to make sure we don't have a leftover directory
|
||||
gallery.DeleteBackendFromSystem(g.appConfig.BackendsPath, op.GalleryElementName)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,9 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type GalleryService struct {
|
||||
@@ -50,7 +52,7 @@ func (g *GalleryService) GetAllStatus() map[string]*GalleryOpStatus {
|
||||
return g.statuses
|
||||
}
|
||||
|
||||
func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader) {
|
||||
func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader) error {
|
||||
// updates the status with an error
|
||||
var updateError func(id string, e error)
|
||||
if !g.appConfig.OpaqueErrors {
|
||||
@@ -63,13 +65,18 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||
}
|
||||
}
|
||||
|
||||
systemState, err := system.GetSystemState()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("failed to get system state")
|
||||
}
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-c.Done():
|
||||
return
|
||||
case op := <-g.BackendGalleryChannel:
|
||||
err := g.backendHandler(&op)
|
||||
err := g.backendHandler(&op, systemState)
|
||||
if err != nil {
|
||||
updateError(op.ID, err)
|
||||
}
|
||||
@@ -82,4 +89,6 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
49
core/system/capabilities.go
Normal file
49
core/system/capabilities.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type SystemState struct {
|
||||
GPUVendor string
|
||||
}
|
||||
|
||||
func GetSystemState() (*SystemState, error) {
|
||||
gpuVendor, _ := detectGPUVendor()
|
||||
log.Debug().Str("gpuVendor", gpuVendor).Msg("GPU vendor")
|
||||
|
||||
return &SystemState{
|
||||
GPUVendor: gpuVendor,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func detectGPUVendor() (string, error) {
|
||||
gpus, err := xsysinfo.GPUs()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
for _, gpu := range gpus {
|
||||
if gpu.DeviceInfo != nil {
|
||||
if gpu.DeviceInfo.Vendor != nil {
|
||||
gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name)
|
||||
if gpuVendorName == "NVIDIA" {
|
||||
return "nvidia", nil
|
||||
}
|
||||
if gpuVendorName == "AMD" {
|
||||
return "amd", nil
|
||||
}
|
||||
if gpuVendorName == "INTEL" {
|
||||
return "intel", nil
|
||||
}
|
||||
return "nvidia", nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return "", nil
|
||||
}
|
||||
@@ -23,7 +23,6 @@ List of the Environment Variables:
|
||||
|----------------------|--------------------------------------------------------------|
|
||||
| **DOCKER_INSTALL** | Set to "true" to enable the installation of Docker images. |
|
||||
| **USE_AIO** | Set to "true" to use the all-in-one LocalAI Docker image. |
|
||||
| **USE_EXTRAS** | Set to "true" to use images with extra Python dependencies. |
|
||||
| **USE_VULKAN** | Set to "true" to use Vulkan GPU support. |
|
||||
| **API_KEY** | Specify an API key for accessing LocalAI, if required. |
|
||||
| **PORT** | Specifies the port on which LocalAI will run (default is 8080). |
|
||||
@@ -39,7 +38,6 @@ List of the Environment Variables:
|
||||
|
||||
The installer will automatically detect your GPU and select the appropriate image. By default, it uses the standard images without extra Python dependencies. You can customize the image selection using the following environment variables:
|
||||
|
||||
- `USE_EXTRAS=true`: Use images with extra Python dependencies (larger images, ~17GB)
|
||||
- `USE_AIO=true`: Use all-in-one images that include all dependencies
|
||||
- `USE_VULKAN=true`: Use Vulkan GPU support instead of vendor-specific GPU support
|
||||
|
||||
|
||||
@@ -71,15 +71,15 @@ To use CUDA, use the images with the `cublas` tag, for example.
|
||||
|
||||
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
|
||||
|
||||
- CUDA `11` tags: `master-cublas-cuda11`, `v1.40.0-cublas-cuda11`, ...
|
||||
- CUDA `12` tags: `master-cublas-cuda12`, `v1.40.0-cublas-cuda12`, ...
|
||||
- CUDA `11` + FFmpeg tags: `master-cublas-cuda11-ffmpeg`, `v1.40.0-cublas-cuda11-ffmpeg`, ...
|
||||
- CUDA `12` + FFmpeg tags: `master-cublas-cuda12-ffmpeg`, `v1.40.0-cublas-cuda12-ffmpeg`, ...
|
||||
- CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
|
||||
- CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
|
||||
- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
|
||||
- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...
|
||||
|
||||
In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:
|
||||
|
||||
```bash
|
||||
docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-cublas-cuda12
|
||||
docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-gpu-nvidia-cuda12
|
||||
```
|
||||
|
||||
If the GPU inferencing is working, you should be able to see something like:
|
||||
@@ -232,8 +232,8 @@ spec:
|
||||
- env:
|
||||
- name: HIP_VISIBLE_DEVICES
|
||||
value: '0'
|
||||
# This variable indicates the devices availible to container (0:device1 1:device2 2:device3) etc.
|
||||
# For multiple devices (say device 1 and 3) the value would be equivelant to HIP_VISIBLE_DEVICES="0,2"
|
||||
# This variable indicates the devices available to container (0:device1 1:device2 2:device3) etc.
|
||||
# For multiple devices (say device 1 and 3) the value would be equivalent to HIP_VISIBLE_DEVICES="0,2"
|
||||
# Please take note of this when an iGPU is present in host system as compatability is not assured.
|
||||
...
|
||||
resources:
|
||||
@@ -259,7 +259,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/
|
||||
|
||||
### Container images
|
||||
|
||||
To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...
|
||||
To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...
|
||||
|
||||
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
|
||||
|
||||
@@ -268,7 +268,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
|
||||
To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
|
||||
|
||||
```bash
|
||||
docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2
|
||||
docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
|
||||
```
|
||||
|
||||
### Notes
|
||||
@@ -276,7 +276,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8
|
||||
In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
|
||||
|
||||
```bash
|
||||
docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
|
||||
docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
|
||||
```
|
||||
|
||||
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
|
||||
@@ -296,7 +296,7 @@ To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}
|
||||
To run LocalAI with Docker and Vulkan, you can use the following command as an example:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core
|
||||
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models localai/localai:latest-vulkan-ffmpeg-core
|
||||
```
|
||||
|
||||
### Notes
|
||||
@@ -308,7 +308,7 @@ These flags are the same as the sections above, depending on the hardware, for [
|
||||
If you have mixed hardware, you can pass flags for multiple GPUs, for example:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \
|
||||
docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models \
|
||||
--gpus=all \ # nvidia passthrough
|
||||
--device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
|
||||
localai/localai:latest-vulkan-ffmpeg-core
|
||||
|
||||
@@ -91,6 +91,13 @@ Your backend container should:
|
||||
5. Have a top level `run.sh` file that will be used to run the backend
|
||||
6. Pushed to a registry so can be used in a gallery
|
||||
|
||||
### Getting started
|
||||
|
||||
For getting started, see the available backends in LocalAI here: https://github.com/mudler/LocalAI/tree/master/backend .
|
||||
|
||||
- For Python based backends there is a template that can be used as starting point: https://github.com/mudler/LocalAI/tree/master/backend/python/common/template .
|
||||
- For Golang based backends, you can see the `bark-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/bark
|
||||
- For C++ based backends, you can see the `llama-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/cpp/llama
|
||||
|
||||
### Publishing Your Backend
|
||||
|
||||
@@ -116,4 +123,4 @@ LocalAI supports various types of backends:
|
||||
- **LLM Backends**: For running language models
|
||||
- **Diffusion Backends**: For image generation
|
||||
- **TTS Backends**: For text-to-speech conversion
|
||||
- **Whisper Backends**: For speech-to-text conversion
|
||||
- **Whisper Backends**: For speech-to-text conversion
|
||||
|
||||
@@ -40,7 +40,7 @@ curl http://localhost:8080/v1/images/generations -H "Content-Type: application/j
|
||||
|
||||
### stablediffusion-ggml
|
||||
|
||||
This backend is based on [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp). Every model supported by that backend is suppoerted indeed with LocalAI.
|
||||
This backend is based on [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp). Every model supported by that backend is supported indeed with LocalAI.
|
||||
|
||||
|
||||
#### Setup
|
||||
@@ -327,4 +327,4 @@ diffusers:
|
||||
```bash
|
||||
(echo -n '{"prompt": "spiderman surfing","size": "512x512","model":"txt2vid"}') |
|
||||
curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
|
||||
```
|
||||
```
|
||||
|
||||
@@ -127,13 +127,6 @@ docker build -t localai .
|
||||
docker run localai
|
||||
```
|
||||
|
||||
There are some build arguments that can be used to customize the build:
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ---------------------| ------- | ----------- |
|
||||
| `IMAGE_TYPE` | `extras` | Build type. Available: `core`, `extras` |
|
||||
|
||||
|
||||
### Example: Build on mac
|
||||
|
||||
Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`.
|
||||
|
||||
@@ -92,7 +92,7 @@ services:
|
||||
- DEBUG=true
|
||||
# ...
|
||||
volumes:
|
||||
- ./models:/build/models:cached
|
||||
- ./models:/models:cached
|
||||
# decomment the following piece if running with Nvidia GPUs
|
||||
# deploy:
|
||||
# resources:
|
||||
@@ -105,21 +105,21 @@ services:
|
||||
|
||||
{{% alert icon="💡" %}}
|
||||
|
||||
**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
|
||||
**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
|
||||
|
||||
You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`).
|
||||
|
||||
If you want to use a named model or a local directory, you can mount it as a volume to `/build/models`:
|
||||
If you want to use a named model or a local directory, you can mount it as a volume to `/models`:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai/localai:latest-aio-cpu
|
||||
docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
or associate a volume:
|
||||
|
||||
```bash
|
||||
docker volume create localai-models
|
||||
docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu
|
||||
docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
{{% /alert %}}
|
||||
@@ -150,10 +150,6 @@ The AIO Images are inheriting the same environment variables as the base images
|
||||
|
||||
Standard container images do not have pre-installed models.
|
||||
|
||||
Images are available with and without python dependencies (images with the `extras` suffix). Note that images with python dependencies are bigger (in order of 17GB).
|
||||
|
||||
Images with `core` in the tag are smaller and do not contain any python dependencies.
|
||||
|
||||
{{< tabs tabTotal="8" >}}
|
||||
{{% tab tabName="Vanilla / CPU Images" %}}
|
||||
|
||||
@@ -169,10 +165,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11` |
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda11` | `localai/localai:master-gpu-nvidia-cuda11` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11` |
|
||||
| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11-extras` | `localai/localai:latest-gpu-nvidia-cuda-11-extras` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda11` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
@@ -180,10 +175,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12` |
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda12` | `localai/localai:master-gpu-nvidia-cuda12` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12` |
|
||||
| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12-extras` | `localai/localai:latest-gpu-nvidia-cuda-12-extras` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda12` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
@@ -191,10 +185,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16` |
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f16` | `localai/localai:master-gpu-intel-f16` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16` |
|
||||
| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16-extras` | `localai/localai:latest-gpu-intel-f16-extras` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16` | `localai/localai:{{< version >}}-gpu-intel-f16` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
@@ -202,10 +195,9 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32` |
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f32` | `localai/localai:master-gpu-intel-f32` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32` |
|
||||
| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32-extras` | `localai/localai:latest-gpu-intel-f32-extras` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f32` | `localai/localai:{{< version >}}-gpu-intel-f32` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
@@ -215,7 +207,6 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-hipblas` | `localai/localai:master-hipblas` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-hipblas` | `localai/localai:latest-gpu-hipblas` |
|
||||
| Latest tag with extras | `quay.io/go-skynet/local-ai:latest-gpu-hipblas-extras` | `localai/localai:latest-gpu-hipblas-extras` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-hipblas` | `localai/localai:{{< version >}}-hipblas` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
@@ -27,19 +27,68 @@ curl https://localai.io/install.sh | sh
|
||||
|
||||
See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported options
|
||||
|
||||
### Run with docker:
|
||||
```bash
|
||||
# CPU only image:
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||
### Run with docker
|
||||
|
||||
# Nvidia GPU:
|
||||
|
||||
#### CPU only image:
|
||||
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
```
|
||||
|
||||
#### NVIDIA GPU Images:
|
||||
|
||||
```bash
|
||||
# CUDA 12.0
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
|
||||
# CPU and GPU image (bigger size):
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
# CUDA 11.7
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
|
||||
|
||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||
# NVIDIA Jetson (L4T) ARM64
|
||||
# First, you need to have installed the nvidia container toolkit: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap
|
||||
docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64
|
||||
```
|
||||
|
||||
#### AMD GPU Images (ROCm):
|
||||
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
|
||||
```
|
||||
|
||||
#### Intel GPU Images (oneAPI):
|
||||
|
||||
```bash
|
||||
# Intel GPU with FP16 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
|
||||
|
||||
# Intel GPU with FP32 support
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
|
||||
```
|
||||
|
||||
#### Vulkan GPU Images:
|
||||
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
|
||||
```
|
||||
|
||||
#### AIO Images (pre-downloaded models):
|
||||
|
||||
```bash
|
||||
# CPU version
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
|
||||
# NVIDIA CUDA 12 version
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
|
||||
# NVIDIA CUDA 11 version
|
||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
|
||||
|
||||
# Intel GPU version
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
|
||||
|
||||
# AMD GPU version
|
||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
|
||||
```
|
||||
|
||||
### Load models:
|
||||
|
||||
@@ -35,7 +35,7 @@ docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
|
||||
Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
|
||||
|
||||
```bash
|
||||
docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
|
||||
docker run -e DEBUG=true -p 8080:8080 -v /data/models:/models -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
|
||||
```
|
||||
|
||||
Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.
|
||||
|
||||
@@ -288,8 +288,8 @@ From this release the default behavior of images has changed. Compilation is not
|
||||
### Container images
|
||||
- Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.20.0`
|
||||
- FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-ffmpeg`
|
||||
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda11-ffmpeg`
|
||||
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda12-ffmpeg`
|
||||
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda11-ffmpeg`
|
||||
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda12-ffmpeg`
|
||||
|
||||
### Updates
|
||||
|
||||
@@ -339,8 +339,8 @@ You can check the full changelog in [Github](https://github.com/go-skynet/LocalA
|
||||
Container images:
|
||||
- Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.19.2`
|
||||
- FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-ffmpeg`
|
||||
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda11-ffmpeg`
|
||||
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda12-ffmpeg`
|
||||
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda11-ffmpeg`
|
||||
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda12-ffmpeg`
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.29.0"
|
||||
"version": "v3.0.0"
|
||||
}
|
||||
|
||||
27
docs/static/install.sh
vendored
27
docs/static/install.sh
vendored
@@ -16,7 +16,6 @@
|
||||
# Environment Variables:
|
||||
# DOCKER_INSTALL - Set to "true" to install Docker images (default: auto-detected)
|
||||
# USE_AIO - Set to "true" to use the all-in-one LocalAI image (default: false)
|
||||
# USE_EXTRAS - Set to "true" to use images with extra Python dependencies (default: false)
|
||||
# USE_VULKAN - Set to "true" to use Vulkan GPU support (default: false)
|
||||
# API_KEY - API key for securing LocalAI access (default: none)
|
||||
# PORT - Port to run LocalAI on (default: 8080)
|
||||
@@ -160,7 +159,6 @@ uninstall_localai() {
|
||||
|
||||
# DOCKER_INSTALL - set to "true" to install Docker images
|
||||
# USE_AIO - set to "true" to install the all-in-one LocalAI image
|
||||
# USE_EXTRAS - set to "true" to use images with extra Python dependencies
|
||||
# USE_VULKAN - set to "true" to use Vulkan GPU support
|
||||
PORT=${PORT:-8080}
|
||||
|
||||
@@ -175,7 +173,6 @@ fi
|
||||
|
||||
DOCKER_INSTALL=${DOCKER_INSTALL:-$docker_found}
|
||||
USE_AIO=${USE_AIO:-false}
|
||||
USE_EXTRAS=${USE_EXTRAS:-false}
|
||||
USE_VULKAN=${USE_VULKAN:-false}
|
||||
API_KEY=${API_KEY:-}
|
||||
CORE_IMAGES=${CORE_IMAGES:-false}
|
||||
@@ -666,7 +663,7 @@ install_docker() {
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-vulkan
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/build/models \
|
||||
$SUDO docker run -v local-ai-data:/models \
|
||||
--device /dev/dri \
|
||||
--restart=always \
|
||||
-e API_KEY=$API_KEY \
|
||||
@@ -675,11 +672,7 @@ install_docker() {
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_CUDA" ]; then
|
||||
# Default to CUDA 12
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12
|
||||
# EXTRAS
|
||||
if [ "$USE_EXTRAS" = true ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12-extras
|
||||
fi
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-gpu-nvidia-cuda12
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12
|
||||
@@ -697,7 +690,7 @@ install_docker() {
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/build/models \
|
||||
$SUDO docker run -v local-ai-data:/models \
|
||||
--gpus all \
|
||||
--restart=always \
|
||||
-e API_KEY=$API_KEY \
|
||||
@@ -706,17 +699,13 @@ install_docker() {
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_AMD" ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-hipblas
|
||||
# EXTRAS
|
||||
if [ "$USE_EXTRAS" = true ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-hipblas-extras
|
||||
fi
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-hipblas
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/build/models \
|
||||
$SUDO docker run -v local-ai-data:/models \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--group-add=video \
|
||||
@@ -727,18 +716,14 @@ install_docker() {
|
||||
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
|
||||
elif [ "$HAS_INTEL" ]; then
|
||||
# Default to FP32 for better compatibility
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32
|
||||
# EXTRAS
|
||||
if [ "$USE_EXTRAS" = true ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32-extras
|
||||
fi
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel-f32
|
||||
# AIO
|
||||
if [ "$USE_AIO" = true ]; then
|
||||
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32
|
||||
fi
|
||||
|
||||
info "Starting LocalAI Docker container..."
|
||||
$SUDO docker run -v local-ai-data:/build/models \
|
||||
$SUDO docker run -v local-ai-data:/models \
|
||||
--device /dev/dri \
|
||||
--restart=always \
|
||||
-e API_KEY=$API_KEY \
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
cd /build
|
||||
cd /
|
||||
|
||||
# If we have set EXTRA_BACKENDS, then we need to prepare the backends
|
||||
if [ -n "$EXTRA_BACKENDS" ]; then
|
||||
@@ -13,38 +13,23 @@ if [ -n "$EXTRA_BACKENDS" ]; then
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "$REBUILD" != "false" ]; then
|
||||
rm -rf ./local-ai
|
||||
make build -j${BUILD_PARALLELISM:-1}
|
||||
echo "CPU info:"
|
||||
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
else
|
||||
echo "@@@@@"
|
||||
echo "Skipping rebuild"
|
||||
echo "@@@@@"
|
||||
echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true"
|
||||
echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:"
|
||||
echo 'CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF"'
|
||||
echo "see the documentation at: https://localai.io/basics/build/index.html"
|
||||
echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288"
|
||||
echo "@@@@@"
|
||||
echo "CPU info:"
|
||||
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
else
|
||||
echo "CPU: no AVX found"
|
||||
fi
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
else
|
||||
echo "CPU: no AVX2 found"
|
||||
fi
|
||||
if grep -q -e "\savx512" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512 found OK"
|
||||
else
|
||||
echo "CPU: no AVX512 found"
|
||||
fi
|
||||
echo "@@@@@"
|
||||
echo "CPU: no AVX found"
|
||||
fi
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
else
|
||||
echo "CPU: no AVX2 found"
|
||||
fi
|
||||
if grep -q -e "\savx512" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512 found OK"
|
||||
else
|
||||
echo "CPU: no AVX512 found"
|
||||
fi
|
||||
|
||||
exec ./local-ai "$@"
|
||||
|
||||
@@ -1170,6 +1170,115 @@
|
||||
- filename: Yanfei-v2-Qwen3-32B.Q4_K_M.gguf
|
||||
sha256: b9c87f5816a66e9036b4af013e3d658f8a11f5e987c44e6d4cb6c4f91e82d3df
|
||||
uri: huggingface://mradermacher/Yanfei-v2-Qwen3-32B-GGUF/Yanfei-v2-Qwen3-32B.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-the-josiefied-omega-directive-22b-uncensored-abliterated-i1"
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated/resolve/main/omega.jpg
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated
|
||||
- https://huggingface.co/mradermacher/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated-i1-GGUF
|
||||
description: |
|
||||
WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
A massive 22B, 62 layer merge of the fantastic "The-Omega-Directive-Qwen3-14B-v1.1" and off the scale "Goekdeniz-Guelmez/Josiefied-Qwen3-14B-abliterated-v3" in Qwen3, with full reasoning (can be turned on or off) and the model is completely uncensored/abliterated too.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
|
||||
sha256: 3d43e00b685004688b05f75d77f756a84eaa24e042d536e12e3ce1faa71f8c64
|
||||
uri: huggingface://mradermacher/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated-i1-GGUF/Qwen3-The-Josiefied-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "menlo_jan-nano"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65713d70f56f9538679e5a56/wC7Xtolp7HOFIdKTOJhVt.png
|
||||
urls:
|
||||
- https://huggingface.co/Menlo/Jan-nano
|
||||
- https://huggingface.co/bartowski/Menlo_Jan-nano-GGUF
|
||||
description: |
|
||||
Jan-Nano is a compact 4-billion parameter language model specifically designed and trained for deep research tasks. This model has been optimized to work seamlessly with Model Context Protocol (MCP) servers, enabling efficient integration with various research tools and data sources.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Menlo_Jan-nano-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Menlo_Jan-nano-Q4_K_M.gguf
|
||||
sha256: b90a30f226e6bce26ef9e0db444cb12530edf90b0eea0defc15b0e361fc698eb
|
||||
uri: huggingface://bartowski/Menlo_Jan-nano-GGUF/Menlo_Jan-nano-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "qwen3-the-xiaolong-omega-directive-22b-uncensored-abliterated-i1"
|
||||
icon: https://huggingface.co/DavidAU/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated/resolve/main/little-dragon-moon.jpg
|
||||
urls:
|
||||
- https://huggingface.co/DavidAU/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated
|
||||
- https://huggingface.co/mradermacher/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated-i1-GGUF
|
||||
description: |
|
||||
WARNING: NSFW. Vivid prose. INTENSE. Visceral Details. Violence. HORROR. GORE. Swearing. UNCENSORED... humor, romance, fun.
|
||||
A massive 22B, 62 layer merge of the fantastic "The-Omega-Directive-Qwen3-14B-v1.1" (by ReadyArt) and off the scale "Xiaolong-Qwen3-14B" (by nbeerbower) in Qwen3, with full reasoning (can be turned on or off) and the model is completely uncensored/abliterated too.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
|
||||
sha256: ecee2813ab0b9cc6f555aff81dfbfe380f7bdaf15cef475c8ff402462f4ddd41
|
||||
uri: huggingface://mradermacher/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated-i1-GGUF/Qwen3-The-Xiaolong-Omega-Directive-22B-uncensored-abliterated.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "allura-org_q3-8b-kintsugi"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/o_fhP0riFrKh-5XyPxQyk.png
|
||||
urls:
|
||||
- https://huggingface.co/allura-org/Q3-8B-Kintsugi
|
||||
- https://huggingface.co/allura-quants/allura-org_Q3-8B-Kintsugi-GGUF
|
||||
description: |
|
||||
Q3-8B-Kintsugi is a roleplaying model finetuned from Qwen3-8B-Base.
|
||||
During testing, Kintsugi punched well above its weight class in terms of parameters, especially for 1-on-1 roleplaying and general storywriting.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Q3-8B-Kintsugi-Q4_K_M.GGUF
|
||||
files:
|
||||
- filename: Q3-8B-Kintsugi-Q4_K_M.GGUF
|
||||
sha256: 2eecf44c709ef02794346d84f7d69ee30059c2a71186e4d18a0861958a4a52db
|
||||
uri: huggingface://allura-quants/allura-org_Q3-8B-Kintsugi-GGUF/Q3-8B-Kintsugi-Q4_K_M.GGUF
|
||||
- !!merge <<: *qwen3
|
||||
name: "ds-r1-qwen3-8b-arliai-rpr-v4-small-iq-imatrix"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/hIZ2ZcaDyfYLT9Yd4pfOs.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/ArliAI/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small
|
||||
- https://huggingface.co/Lewdiculous/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-GGUF-IQ-Imatrix
|
||||
description: |
|
||||
The best RP/creative model series from ArliAI yet again. This time made based on DS-R1-0528-Qwen3-8B-Fast for a smaller memory footprint.
|
||||
|
||||
Reduced repetitions and impersonation
|
||||
|
||||
To add to the creativity and out of the box thinking of RpR v3, a more advanced filtering method was used in order to remove examples where the LLM repeated similar phrases or talked for the user. Any repetition or impersonation cases that happens will be due to how the base QwQ model was trained, and not because of the RpR dataset.
|
||||
|
||||
Increased training sequence length
|
||||
|
||||
The training sequence length was increased to 16K in order to help awareness and memory even on longer chats.
|
||||
overrides:
|
||||
parameters:
|
||||
model: DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-Q4_K_M-imat.gguf
|
||||
sha256: b40be91d3d2f2497efa849e69f0bb303956b54e658f57bc39c41dba424018d71
|
||||
uri: huggingface://Lewdiculous/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-GGUF-IQ-Imatrix/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *qwen3
|
||||
name: "menlo_jan-nano-128k"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65713d70f56f9538679e5a56/NP7CvcjOtLX8mST0t7eAM.png
|
||||
urls:
|
||||
- https://huggingface.co/Menlo/Jan-nano-128k
|
||||
- https://huggingface.co/bartowski/Menlo_Jan-nano-128k-GGUF
|
||||
description: |
|
||||
Jan-Nano-128k represents a significant advancement in compact language models for research applications. Building upon the success of Jan-Nano, this enhanced version features a native 128k context window that enables deeper, more comprehensive research capabilities without the performance degradation typically associated with context extension methods.
|
||||
|
||||
Key Improvements:
|
||||
|
||||
🔍 Research Deeper: Extended context allows for processing entire research papers, lengthy documents, and complex multi-turn conversations
|
||||
⚡ Native 128k Window: Built from the ground up to handle long contexts efficiently, maintaining performance across the full context range
|
||||
📈 Enhanced Performance: Unlike traditional context extension methods, Jan-Nano-128k shows improved performance with longer contexts
|
||||
|
||||
This model maintains full compatibility with Model Context Protocol (MCP) servers while dramatically expanding the scope of research tasks it can handle in a single session.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Menlo_Jan-nano-128k-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Menlo_Jan-nano-128k-Q4_K_M.gguf
|
||||
sha256: a864031a138288da427ca176afd61d7fe2b03fd19a84a656b2691aa1f7a12921
|
||||
uri: huggingface://bartowski/Menlo_Jan-nano-128k-GGUF/Menlo_Jan-nano-128k-Q4_K_M.gguf
|
||||
- &gemma3
|
||||
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
||||
name: "gemma-3-27b-it"
|
||||
@@ -1867,6 +1976,36 @@
|
||||
- filename: medgemma-27b-text-it-Q4_K_M.gguf
|
||||
sha256: 383b1c414d3f2f1a9c577a61e623d29a4ed4f7834f60b9e5412f5ff4e8aaf080
|
||||
uri: huggingface://unsloth/medgemma-27b-text-it-GGUF/medgemma-27b-text-it-Q4_K_M.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "gemma-3n-e2b-it"
|
||||
urls:
|
||||
- https://huggingface.co/google/gemma-3n-E4B-it
|
||||
- https://huggingface.co/ggml-org/gemma-3n-E2B-it-GGUF
|
||||
description: |
|
||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. Gemma 3n models are designed for efficient execution on low-resource devices. They are capable of multimodal input, handling text, image, video, and audio input, and generating text outputs, with open weights for pre-trained and instruction-tuned variants. These models were trained with data in over 140 spoken languages.
|
||||
Gemma 3n models use selective parameter activation technology to reduce resource requirements. This technique allows the models to operate at an effective size of 2B and 4B parameters, which is lower than the total number of parameters they contain. For more information on Gemma 3n's efficient parameter management technology, see the Gemma 3n page.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma-3n-E2B-it-Q8_0.gguf
|
||||
files:
|
||||
- filename: gemma-3n-E2B-it-Q8_0.gguf
|
||||
sha256: 038a47c482e7af3009c462b56a7592e1ade3c7862540717aa1d9dee1760c337b
|
||||
uri: huggingface://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf
|
||||
- !!merge <<: *gemma3
|
||||
name: "gemma-3n-e4b-it"
|
||||
urls:
|
||||
- https://huggingface.co/google/gemma-3n-E4B-it
|
||||
- https://huggingface.co/ggml-org/gemma-3n-E4B-it-GGUF
|
||||
description: |
|
||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. Gemma 3n models are designed for efficient execution on low-resource devices. They are capable of multimodal input, handling text, image, video, and audio input, and generating text outputs, with open weights for pre-trained and instruction-tuned variants. These models were trained with data in over 140 spoken languages.
|
||||
Gemma 3n models use selective parameter activation technology to reduce resource requirements. This technique allows the models to operate at an effective size of 2B and 4B parameters, which is lower than the total number of parameters they contain. For more information on Gemma 3n's efficient parameter management technology, see the Gemma 3n page.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma-3n-E4B-it-Q8_0.gguf
|
||||
files:
|
||||
- filename: gemma-3n-E4B-it-Q8_0.gguf
|
||||
sha256: 9f74079242c765116bd1f33123aa07160b5e93578c2d0032594b7ed97576f9c3
|
||||
uri: huggingface://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf
|
||||
- &llama4
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
@@ -3355,6 +3494,27 @@
|
||||
- filename: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
|
||||
sha256: 86ac8efb86daf241792ac3d5d35b7da92c54901b4208a6f2829bd03d8f273c9c
|
||||
uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25coder
|
||||
name: "skywork_skywork-swe-32b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6665dd2b3a64c70529f7542c/8o-IE7N3GwSFCIH3ntc8E.png
|
||||
urls:
|
||||
- https://huggingface.co/Skywork/Skywork-SWE-32B
|
||||
- https://huggingface.co/bartowski/Skywork_Skywork-SWE-32B-GGUF
|
||||
description: |
|
||||
Skywork-SWE-32B is a code agent model developed by Skywork AI, specifically designed for software engineering (SWE) tasks. It demonstrates strong performance across several key metrics:
|
||||
|
||||
Skywork-SWE-32B attains 38.0% pass@1 accuracy on the SWE-bench Verified benchmark, outperforming previous open-source SoTA Qwen2.5-Coder-32B-based LLMs built on the OpenHands agent framework.
|
||||
When incorporated with test-time scaling techniques, the performance further improves to 47.0% accuracy, surpassing the previous SoTA results for sub-32B parameter models.
|
||||
We clearly demonstrate the data scaling law phenomenon for software engineering capabilities in LLMs, with no signs of saturation at 8209 collected training trajectories.
|
||||
|
||||
We also introduce an efficient and automated pipeline for SWE data collection, culminating in the creation of the Skywork-SWE dataset---a large-scale, high-quality dataset featuring comprehensive executable runtime environments. Detailed descriptions are available on our technical report.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Skywork_Skywork-SWE-32B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Skywork_Skywork-SWE-32B-Q4_K_M.gguf
|
||||
sha256: b5a451fa677159d7ab0aee64eeec5933aa4e5bd598e400501ecec3af0a767fa8
|
||||
uri: huggingface://bartowski/Skywork_Skywork-SWE-32B-GGUF/Skywork_Skywork-SWE-32B-Q4_K_M.gguf
|
||||
- &opencoder
|
||||
name: "opencoder-8b-base"
|
||||
icon: https://avatars.githubusercontent.com/u/186387526
|
||||
@@ -10390,6 +10550,55 @@
|
||||
- filename: mmproj-ultravox-v0_5-llama-3_1-8b-f16.gguf
|
||||
sha256: e6395ed42124303eaa9fca934452aabce14c59d2a56fab2dda65b798442289ff
|
||||
uri: https://huggingface.co/ggml-org/ultravox-v0_5-llama-3_1-8b-GGUF/resolve/main/mmproj-ultravox-v0_5-llama-3_1-8b-f16.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "astrosage-70b"
|
||||
urls:
|
||||
- https://huggingface.co/AstroMLab/AstroSage-70B
|
||||
- https://huggingface.co/mradermacher/AstroSage-70B-GGUF
|
||||
description: |
|
||||
Developed by: AstroMLab (Tijmen de Haan, Yuan-Sen Ting, Tirthankar Ghosal, Tuan Dung Nguyen, Alberto Accomazzi, Emily Herron, Vanessa Lama, Azton Wells, Nesar Ramachandra, Rui Pan)
|
||||
Funded by:
|
||||
Oak Ridge Leadership Computing Facility (OLCF), a DOE Office of Science User Facility at Oak Ridge National Laboratory (U.S. Department of Energy).
|
||||
Microsoft’s Accelerating Foundation Models Research (AFMR) program.
|
||||
World Premier International Research Center Initiative (WPI), MEXT, Japan.
|
||||
National Science Foundation (NSF).
|
||||
UChicago Argonne LLC, Operator of Argonne National Laboratory (U.S. Department of Energy).
|
||||
Reference Paper: Tijmen de Haan et al. (2025). "AstroMLab 4: Benchmark-Topping Performance in Astronomy Q&A with a 70B-Parameter Domain-Specialized Reasoning Model" https://arxiv.org/abs/2505.17592
|
||||
Model Type: Autoregressive transformer-based LLM, specialized in astronomy, astrophysics, space science, astroparticle physics, cosmology, and astronomical instrumentation.
|
||||
Model Architecture: AstroSage-70B is a fine-tuned derivative of the Meta-Llama-3.1-70B architecture, making no architectural changes. The Llama-3.1-70B-Instruct tokenizer is also used without modification.
|
||||
Context Length: Fine-tuned on 8192-token sequences. Base model was trained to 128k context length.
|
||||
AstroSage-70B is a large-scale, domain-specialized language model tailored for research and education in astronomy, astrophysics, space science, cosmology, and astronomical instrumentation. It builds on the Llama-3.1-70B foundation model, enhanced through extensive continued pre-training (CPT) on a vast corpus of astronomical literature, further refined with supervised fine-tuning (SFT) on instruction-following datasets, and finally enhanced via parameter averaging (model merging) with other popular fine tunes. AstroSage-70B aims to achieve state-of-the-art performance on astronomy-specific tasks, providing researchers, students, and enthusiasts with an advanced AI assistant. This 70B parameter model represents a significant scaling up from the AstroSage-8B model. The primary enhancements from the AstroSage-8B model are:
|
||||
|
||||
Stronger base model, higher parameter count for increased capacity
|
||||
Improved datasets
|
||||
Improved learning hyperparameters
|
||||
Reasoning capability (can be enabled or disabled at inference time)
|
||||
Training Lineage
|
||||
Base Model: Meta-Llama-3.1-70B.
|
||||
Continued Pre-Training (CPT): The base model underwent 2.5 epochs of CPT (168k GPU-hours) on a specialized astronomy corpus (details below, largely inherited from AstroSage-8B) to produce AstroSage-70B-CPT. This stage imbues domain-specific knowledge and language nuances.
|
||||
Supervised Fine-Tuning (SFT): AstroSage-70B-CPT was then fine-tuned for 0.6 epochs (13k GPU-hours) using astronomy-relevant and general-purpose instruction-following datasets, resulting in AstroSage-70B-SFT.
|
||||
Final Mixture: The released AstroSage-70B model is created via parameter averaging / model merging:
|
||||
DARE-TIES with rescale: true and lambda: 1.2
|
||||
AstroSage-70B-CPT designated as the "base model"
|
||||
70% AstroSage-70B-SFT (density 0.7)
|
||||
15% Llama-3.1-Nemotron-70B-Instruct (density 0.5)
|
||||
7.5% Llama-3.3-70B-Instruct (density 0.5)
|
||||
7.5% Llama-3.1-70B-Instruct (density 0.5)
|
||||
Intended Use: Like AstroSage-8B, this model can be used for a variety of LLM application, including
|
||||
Providing factual information and explanations in astronomy, astrophysics, cosmology, and instrumentation.
|
||||
Assisting with literature reviews and summarizing scientific papers.
|
||||
Answering domain-specific questions with high accuracy.
|
||||
Brainstorming research ideas and formulating hypotheses.
|
||||
Assisting with programming tasks related to astronomical data analysis.
|
||||
Serving as an educational tool for learning astronomical concepts.
|
||||
Potentially forming the core of future agentic research assistants capable of more autonomous scientific tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: AstroSage-70B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: AstroSage-70B.Q4_K_M.gguf
|
||||
sha256: 1d98dabfa001d358d9f95d2deba93a94ad8baa8839c75a0129cdb6bcf1507f38
|
||||
uri: huggingface://mradermacher/AstroSage-70B-GGUF/AstroSage-70B.Q4_K_M.gguf
|
||||
- &deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" ## Deepseek
|
||||
name: "deepseek-coder-v2-lite-instruct"
|
||||
@@ -12655,6 +12864,47 @@
|
||||
- filename: mistralai_Magistral-Small-2506-Q4_K_M.gguf
|
||||
sha256: b681b81ba30238b7654db77b4b3afa7b0f6226c84d8bbd5a5dfb1a5a3cb95816
|
||||
uri: huggingface://bartowski/mistralai_Magistral-Small-2506-GGUF/mistralai_Magistral-Small-2506-Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/634c17653d11eaedd88b314d/9OgyfKstSZtbmsmuG8MbU.png
|
||||
name: "mistralai_mistral-small-3.2-24b-instruct-2506"
|
||||
urls:
|
||||
- https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506
|
||||
- https://huggingface.co/bartowski/mistralai_Mistral-Small-3.2-24B-Instruct-2506-GGUF
|
||||
description: |
|
||||
Mistral-Small-3.2-24B-Instruct-2506 is a minor update of Mistral-Small-3.1-24B-Instruct-2503.
|
||||
|
||||
Small-3.2 improves in the following categories:
|
||||
|
||||
Instruction following: Small-3.2 is better at following precise instructions
|
||||
Repetition errors: Small-3.2 produces less infinite generations or repetitive answers
|
||||
Function calling: Small-3.2's function calling template is more robust (see here and examples)
|
||||
|
||||
In all other categories Small-3.2 should match or slightly improve compared to Mistral-Small-3.1-24B-Instruct-2503.
|
||||
overrides:
|
||||
parameters:
|
||||
model: mistralai_Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: mistralai_Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf
|
||||
sha256: 2ad86e0934a4d6f021c1dbcf12d81aac75a84edd3a929294c09cb1cb6117627c
|
||||
uri: huggingface://bartowski/mistralai_Mistral-Small-3.2-24B-Instruct-2506-GGUF/mistralai_Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf
|
||||
- !!merge <<: *mistral03
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/jxUvuFK1bdOdAPiYIcBW5.jpeg
|
||||
name: "delta-vector_austral-24b-winton"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Delta-Vector/Austral-24B-Winton
|
||||
- https://huggingface.co/bartowski/Delta-Vector_Austral-24B-Winton-GGUF
|
||||
description: |
|
||||
More than 1.5-metres tall, about six-metres long and up to 1000-kilograms heavy, Australovenator Wintonensis was a fast and agile hunter. The largest known Australian theropod.
|
||||
|
||||
This is a finetune of Harbinger 24B to be a generalist Roleplay/Adventure model. I've removed some of the "slops" that i noticed in an otherwise great model aswell as improving the general writing of the model, This was a multi-stage finetune, all previous checkpoints are released aswell.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
|
||||
sha256: feb76e0158d1ebba1809de89d01671b86037f768ebd5f6fb165885ae6338b1b7
|
||||
uri: huggingface://bartowski/Delta-Vector_Austral-24B-Winton-GGUF/Delta-Vector_Austral-24B-Winton-Q4_K_M.gguf
|
||||
- &mudler
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
|
||||
name: "LocalAI-llama3-8b-function-call-v0.2"
|
||||
|
||||
@@ -7,10 +7,15 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
)
|
||||
|
||||
func InstallExternalBackends(galleries []config.Gallery, backendPath string, downloadStatus func(string, string, string, float64), backends ...string) error {
|
||||
var errs error
|
||||
systemState, err := system.GetSystemState()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get system state: %w", err)
|
||||
}
|
||||
for _, backend := range backends {
|
||||
switch {
|
||||
case strings.HasPrefix(backend, "oci://"):
|
||||
@@ -22,7 +27,7 @@ func InstallExternalBackends(galleries []config.Gallery, backendPath string, dow
|
||||
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
|
||||
}
|
||||
default:
|
||||
err := gallery.InstallBackendFromGallery(galleries, backend, backendPath, downloadStatus)
|
||||
err := gallery.InstallBackendFromGallery(galleries, systemState, backend, backendPath, downloadStatus)
|
||||
if err != nil {
|
||||
errs = errors.Join(err, fmt.Errorf("error installing backend %s", backend))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user