mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-04 03:32:40 -05:00
Compare commits
14 Commits
v2.18.0
...
docker_ima
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95f773ee4b | ||
|
|
ad85c5a1e7 | ||
|
|
421eb8a727 | ||
|
|
b7ff441cc0 | ||
|
|
83d867ad46 | ||
|
|
6acba2bcbe | ||
|
|
6a2a10603c | ||
|
|
356907a5cf | ||
|
|
7ab7a188d0 | ||
|
|
ff1a5bfc62 | ||
|
|
522f185baf | ||
|
|
f7b5a4ca7d | ||
|
|
1d30955677 | ||
|
|
d3307e93d3 |
101
Dockerfile
101
Dockerfile
@@ -5,6 +5,12 @@ ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM ${BASE_IMAGE} AS requirements-core
|
||||
# TODO(mudler): install all accellerators here
|
||||
# and use make dist instead of build.
|
||||
# TODO(mudler): modify make dist to build also go-piper and stablediffusion
|
||||
# This way the same binary can work for everything(!)
|
||||
# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
|
||||
# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies
|
||||
|
||||
USER root
|
||||
|
||||
@@ -49,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
# HipBLAS requirements
|
||||
ENV PATH /opt/rocm/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
# OpenBLAS requirements and stable diffusion, tts (espeak)
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
espeak-ng \
|
||||
espeak \
|
||||
libopencv-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
@@ -77,8 +85,6 @@ ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
espeak-ng \
|
||||
espeak \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
python3-dev \
|
||||
@@ -93,9 +99,8 @@ RUN pip install --user grpcio-tools
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
# Base image for the build-type.
|
||||
FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
@@ -186,6 +191,82 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
ldconfig \
|
||||
; fi
|
||||
|
||||
# The build-requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||
FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=5
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
vulkan-sdk && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
EOT
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN <<EOT bash
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
EOT
|
||||
|
||||
# clblas
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# intel
|
||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# hipblas
|
||||
RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
|
||||
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
|
||||
| tee /etc/apt/sources.list.d/amdgpu.list && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" | tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
apt update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
hipblas-dev rocm-dev \
|
||||
rocblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||
ldconfig
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
@@ -237,7 +318,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM requirements-drivers AS builder
|
||||
FROM build-requirements-drivers AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
@@ -282,7 +363,8 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
RUN make build
|
||||
# Need to build tts and stablediffusion separately first (?)
|
||||
RUN make dist && rm release/*.sha256 && mv release/* local-ai
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
@@ -294,7 +376,7 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
|
||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||
FROM requirements-drivers
|
||||
FROM run-requirements-drivers
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
@@ -339,6 +421,7 @@ RUN make prepare-sources
|
||||
COPY --from=builder /build/local-ai ./
|
||||
|
||||
# Copy shared libraries for piper
|
||||
# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
|
||||
4
Makefile
4
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=e57dc62057d41211ac018056c19c02cd544694df
|
||||
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -337,10 +337,12 @@ dist:
|
||||
ifeq ($(OS),Darwin)
|
||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
||||
else
|
||||
ifneq ($(ARCH),arm64)
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
|
||||
endif
|
||||
endif
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
|
||||
{{ range .ModelsConfig }}
|
||||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
<option value="{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
|
||||
{{ end }}
|
||||
</select>
|
||||
</div>
|
||||
@@ -89,7 +89,7 @@
|
||||
>
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ range .ModelsConfig }}
|
||||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
<option value="{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
|
||||
{{ end }}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.17.1"
|
||||
"version": "v2.18.0"
|
||||
}
|
||||
|
||||
@@ -318,28 +318,28 @@
|
||||
- https://huggingface.co/google/gemma-2-27b-it
|
||||
- https://huggingface.co/bartowski/gemma-2-27b-it-GGUF
|
||||
description: |
|
||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
|
||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma-2-27b-it-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: gemma-2-27b-it-Q4_K_M.gguf
|
||||
sha256: e54e7b800d464af4fa9966020e4a1b1d386cd9346de2d851a7bfe7d0797c44c4
|
||||
uri: huggingface://bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf
|
||||
sha256: ca86fbdb791842cf2e5eb276a6916e326b3b5d58d9ab60ee3e18b1c6f01fc181
|
||||
- !!merge <<: *gemma
|
||||
name: "gemma-2-9b-it"
|
||||
urls:
|
||||
- https://huggingface.co/google/gemma-2-9b-it
|
||||
- https://huggingface.co/bartowski/gemma-2-9b-it-GGUF
|
||||
description: |
|
||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
|
||||
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
|
||||
overrides:
|
||||
parameters:
|
||||
model: gemma-2-9b-it-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: gemma-2-9b-it-Q4_K_M.gguf
|
||||
sha256: 0874bf61be2e4b3d0a4a75e58fbd442dc410745d513c1e1e5de0b54ae33e65db
|
||||
uri: huggingface://bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf
|
||||
sha256: c70fd20caec79fb953b83031c46ddea4e99905835a66af7b8a856aa1b2534614
|
||||
- &llama3
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
@@ -1268,6 +1268,83 @@
|
||||
- filename: LLaMA3-iterative-DPO-final-Q4_K_M.gguf
|
||||
sha256: 480703ff85af337e1db2a9d9a678a3ac8ca0802e366b14d9c59b81d3fc689da8
|
||||
uri: huggingface://bartowski/LLaMA3-iterative-DPO-final-GGUF/LLaMA3-iterative-DPO-final-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "new-dawn-llama-3-70b-32K-v1.0"
|
||||
urls:
|
||||
- https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF
|
||||
- https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0
|
||||
icon: https://imgur.com/tKzncGo.png
|
||||
description: |
|
||||
This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork.
|
||||
This model is uncensored. You are responsible for whatever you do with it.
|
||||
|
||||
This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.
|
||||
overrides:
|
||||
parameters:
|
||||
model: New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf
|
||||
sha256: 30561ae5decac4ad46775c76a9a40fb43436ade96bc132b4b9cc6749b9e2f448
|
||||
uri: huggingface://bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF/New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "l3-aethora-15b-v2"
|
||||
urls:
|
||||
- https://huggingface.co/bartowski/L3-Aethora-15B-V2-GGUF
|
||||
- https://huggingface.co/ZeusLabs/L3-Aethora-15B-V2
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/yJpwVd5UTnAVDoEPVVCS1.png
|
||||
description: |
|
||||
L3-Aethora-15B v2 is an advanced language model built upon the Llama 3 architecture. It employs state-of-the-art training techniques and a curated dataset to deliver enhanced performance across a wide range of tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3-Aethora-15B-V2-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3-Aethora-15B-V2-Q4_K_M.gguf
|
||||
sha256: 014a215739e1574e354780f218776e54807548d0c32555274c4d96d7628f29b6
|
||||
uri: huggingface://bartowski/L3-Aethora-15B-V2-GGUF/L3-Aethora-15B-V2-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "bungo-l3-8b-iq-imatrix"
|
||||
urls:
|
||||
- https://huggingface.co/Lewdiculous/Bungo-L3-8B-GGUF-IQ-Imatrix-Request
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/ezaxE50ef-7RsFi3gUbNp.webp
|
||||
description: |
|
||||
An experimental model that turned really well. Scores high on Chai leaderboard (slerp8bv2 there). Feel smarter than average L3 merges for RP.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Bungo-L3-8B-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: Bungo-L3-8B-Q4_K_M-imat.gguf
|
||||
sha256: 88d0139954e8f9525b80636a6269df885008c4837a1332f84f9a5dc6f37c9b8f
|
||||
uri: huggingface://Lewdiculous/Bungo-L3-8B-GGUF-IQ-Imatrix-Request/Bungo-L3-8B-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama3-8b-darkidol-2.1-uncensored-1048k-iq-imatrix"
|
||||
urls:
|
||||
- https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/tKL5W1G5WCHm4609LEmiM.png
|
||||
description: |
|
||||
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
|
||||
Uncensored 1048K
|
||||
overrides:
|
||||
parameters:
|
||||
model: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
sha256: 86f0f1e10fc315689e09314aebb7354bb40d8fe95de008d21a75dc8fff1cd2fe
|
||||
uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama3-turbcat-instruct-8b"
|
||||
urls:
|
||||
- https://huggingface.co/turboderp/llama3-turbcat-instruct-8b
|
||||
- https://huggingface.co/bartowski/llama3-turbcat-instruct-8b-GGUF
|
||||
icon: https://huggingface.co/turboderp/llama3-turbcat-instruct-8b/resolve/main/8.png
|
||||
description: |
|
||||
This is a direct upgrade over cat 70B, with 2x the dataset size(2GB-> 5GB), added Chinese support with quality on par with the original English dataset. The medical COT portion of the dataset has been sponsored by steelskull, and the action packed character play portion was donated by Gryphe's(aesir dataset). Note that 8b is based on llama3 with limited Chinese support due to base model choice. The chat format in 8b is llama3. The 72b has more comprehensive Chinese support and the format will be chatml.
|
||||
overrides:
|
||||
parameters:
|
||||
model: llama3-turbcat-instruct-8b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: llama3-turbcat-instruct-8b-Q4_K_M.gguf
|
||||
sha256: a9a36e3220d901a8ad80c75608a81aaeed3a9cdf111247462bf5e3443aad5461
|
||||
uri: huggingface://bartowski/llama3-turbcat-instruct-8b-GGUF/llama3-turbcat-instruct-8b-Q4_K_M.gguf
|
||||
- &dolphin
|
||||
name: "dolphin-2.9-llama3-8b"
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
@@ -2627,6 +2704,69 @@
|
||||
- filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
|
||||
uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
|
||||
sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
|
||||
- &llm-compiler
|
||||
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
|
||||
name: "llm-compiler-13b-imat"
|
||||
license: other
|
||||
description: |
|
||||
LLM Compiler is a state-of-the-art LLM that builds upon Code Llama with improved performance for code optimization and compiler reasoning.
|
||||
LLM Compiler is free for both research and commercial use.
|
||||
LLM Compiler is available in two flavors:
|
||||
|
||||
LLM Compiler, the foundational models, pretrained on over 500B tokens of LLVM-IR, x86_84, ARM, and CUDA assembly codes and trained to predict the effect of LLVM optimizations;
|
||||
and LLM Compiler FTD, which is further fine-tuned to predict the best optimizations for code in LLVM assembly to reduce code size, and to disassemble assembly code to LLVM-IR.
|
||||
urls:
|
||||
- https://huggingface.co/legraphista/llm-compiler-13b-IMat-GGUF
|
||||
- https://huggingface.co/facebook/llm-compiler-13b
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- code
|
||||
- cpu
|
||||
overrides:
|
||||
parameters:
|
||||
model: llm-compiler-13b.Q4_K.gguf
|
||||
files:
|
||||
- filename: "llm-compiler-13b.Q4_K.gguf"
|
||||
uri: "huggingface://legraphista/llm-compiler-13b-IMat-GGUF/llm-compiler-13b.Q4_K.gguf"
|
||||
sha256: dad41a121d0d67432c289aba8ffffc93159e2b24ca3d1c62e118c9f4cbf0c890
|
||||
- !!merge <<: *llm-compiler
|
||||
name: "llm-compiler-13b-ftd"
|
||||
urls:
|
||||
- https://huggingface.co/QuantFactory/llm-compiler-13b-ftd-GGUF
|
||||
- https://huggingface.co/facebook/llm-compiler-13b-ftd
|
||||
overrides:
|
||||
parameters:
|
||||
model: llm-compiler-13b-ftd.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: "llm-compiler-13b-ftd.Q4_K_M.gguf"
|
||||
uri: "huggingface://QuantFactory/llm-compiler-13b-ftd-GGUF/llm-compiler-13b-ftd.Q4_K_M.gguf"
|
||||
sha256: a5d19ae6b3fbe6724784363161b66cd2c8d8a3905761c0fb08245b3c03697db1
|
||||
- !!merge <<: *llm-compiler
|
||||
name: "llm-compiler-7b-imat-GGUF"
|
||||
urls:
|
||||
- https://huggingface.co/legraphista/llm-compiler-7b-IMat-GGUF
|
||||
- https://huggingface.co/facebook/llm-compiler-7b
|
||||
overrides:
|
||||
parameters:
|
||||
model: llm-compiler-7b.Q4_K.gguf
|
||||
files:
|
||||
- filename: "llm-compiler-7b.Q4_K.gguf"
|
||||
uri: "huggingface://legraphista/llm-compiler-7b-IMat-GGUF/llm-compiler-7b.Q4_K.gguf"
|
||||
sha256: 84926979701fa4591ff5ede94a6c5829a62efa620590e5815af984707d446926
|
||||
- !!merge <<: *llm-compiler
|
||||
name: "llm-compiler-7b-ftd-imat"
|
||||
urls:
|
||||
- https://huggingface.co/legraphista/llm-compiler-7b-ftd-IMat-GGUF
|
||||
- https://huggingface.co/facebook/llm-compiler-7b-ftd
|
||||
overrides:
|
||||
parameters:
|
||||
model: llm-compiler-7b-ftd.Q4_K.gguf
|
||||
files:
|
||||
- filename: "llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
|
||||
- &openvino
|
||||
### START OpenVINO
|
||||
url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
|
||||
|
||||
Reference in New Issue
Block a user