Compare commits

...

14 Commits

Author SHA1 Message Date
Ettore Di Giacinto
95f773ee4b experiment: build with a single image with all the deps
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-01 19:43:18 +02:00
LocalAI [bot]
ad85c5a1e7 models(gallery): ⬆️ update checksum (#2690)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-01 00:23:58 +00:00
LocalAI [bot]
421eb8a727 ⬆️ Update ggerganov/llama.cpp (#2689)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-01 00:20:11 +00:00
Ettore Di Giacinto
b7ff441cc0 models(gallery): add llama3-turbcat-instruct-8b (#2687)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-30 14:40:01 +02:00
LocalAI [bot]
83d867ad46 ⬆️ Update ggerganov/llama.cpp (#2683)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-06-30 01:51:51 +00:00
Ettore Di Giacinto
6acba2bcbe models(gallery): add llm-compiler (#2684)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-29 23:32:01 +02:00
Ettore Di Giacinto
6a2a10603c fix(talk): identify the model by ID instead of name (#2685)
This fixes a breakage in rendering the template. Now the models passed
by to the renderer have the ID field rather then Name

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-29 23:31:50 +02:00
Ettore Di Giacinto
356907a5cf models(gallery): add llama3-8b-darkidol-2.1-uncensored-1048k-iq-imatrix (#2686)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-29 23:31:37 +02:00
Ettore Di Giacinto
7ab7a188d0 models(gallery): add bungo-l3-8b-iq-imatrix (#2682)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-29 22:05:27 +02:00
Ettore Di Giacinto
ff1a5bfc62 models(gallery): add l3-aethora-15b-v2 (#2679)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-29 11:33:00 +02:00
LocalAI [bot]
522f185baf ⬆️ Update docs version mudler/LocalAI (#2676)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-06-29 01:20:50 +00:00
LocalAI [bot]
f7b5a4ca7d models(gallery): ⬆️ update checksum (#2678)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-06-29 01:06:08 +00:00
LocalAI [bot]
1d30955677 ⬆️ Update ggerganov/llama.cpp (#2677)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-06-29 00:43:02 +00:00
Ettore Di Giacinto
d3307e93d3 models(gallery): add new-dawn-llama (#2672)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-06-28 23:33:27 +02:00
5 changed files with 242 additions and 17 deletions

View File

@@ -5,6 +5,12 @@ ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
FROM ${BASE_IMAGE} AS requirements-core
# TODO(mudler): install all accellerators here
# and use make dist instead of build.
# TODO(mudler): modify make dist to build also go-piper and stablediffusion
# This way the same binary can work for everything(!)
# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies
USER root
@@ -49,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH /opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion
# OpenBLAS requirements and stable diffusion, tts (espeak)
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev \
espeak-ng \
espeak \
libopencv-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -77,8 +85,6 @@ ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN apt-get update && \
apt-get install -y --no-install-recommends \
espeak-ng \
espeak \
python3-pip \
python-is-python3 \
python3-dev \
@@ -93,9 +99,8 @@ RUN pip install --user grpcio-tools
###################################
###################################
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
# Base image for the build-type.
FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
@@ -186,6 +191,82 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
ldconfig \
; fi
# The build-requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=5
ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements
RUN <<EOT bash
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
EOT
# CuBLAS requirements
RUN <<EOT bash
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
EOT
# clblas
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# intel
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
rm -rf /var/lib/apt/lists/*
# hipblas
RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
| tee /etc/apt/sources.list.d/amdgpu.list && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" | tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
apt update && \
apt-get install -y --no-install-recommends \
hipblas-dev rocm-dev \
rocblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig
###################################
###################################
@@ -237,7 +318,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM requirements-drivers AS builder
FROM build-requirements-drivers AS builder
ARG GO_TAGS="stablediffusion tts p2p"
ARG GRPC_BACKENDS
@@ -282,7 +363,8 @@ COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends
WORKDIR /build
RUN make build
# Need to build tts and stablediffusion separately first (?)
RUN make dist && rm release/*.sha256 && mv release/* local-ai
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -294,7 +376,7 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
# This is the final target. The result of this target will be the image uploaded to the registry.
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM requirements-drivers
FROM run-requirements-drivers
ARG FFMPEG
ARG BUILD_TYPE
@@ -339,6 +421,7 @@ RUN make prepare-sources
COPY --from=builder /build/local-ai ./
# Copy shared libraries for piper
# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)

View File

@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=e57dc62057d41211ac018056c19c02cd544694df
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -337,10 +337,12 @@ dist:
ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
else
ifneq ($(ARCH),arm64)
$(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
endif
endif
STATIC=true $(MAKE) build
mkdir -p release

View File

@@ -76,7 +76,7 @@
<option value="" disabled class="text-gray-400" >Select a model</option>
{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
<option value="{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
{{ end }}
</select>
</div>
@@ -89,7 +89,7 @@
>
<option value="" disabled class="text-gray-400" >Select a model</option>
{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
<option value="{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
{{ end }}
</select>
</div>

View File

@@ -1,3 +1,3 @@
{
"version": "v2.17.1"
"version": "v2.18.0"
}

View File

@@ -318,28 +318,28 @@
- https://huggingface.co/google/gemma-2-27b-it
- https://huggingface.co/bartowski/gemma-2-27b-it-GGUF
description: |
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
overrides:
parameters:
model: gemma-2-27b-it-Q4_K_M.gguf
files:
- filename: gemma-2-27b-it-Q4_K_M.gguf
sha256: e54e7b800d464af4fa9966020e4a1b1d386cd9346de2d851a7bfe7d0797c44c4
uri: huggingface://bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf
sha256: ca86fbdb791842cf2e5eb276a6916e326b3b5d58d9ab60ee3e18b1c6f01fc181
- !!merge <<: *gemma
name: "gemma-2-9b-it"
urls:
- https://huggingface.co/google/gemma-2-9b-it
- https://huggingface.co/bartowski/gemma-2-9b-it-GGUF
description: |
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.
overrides:
parameters:
model: gemma-2-9b-it-Q4_K_M.gguf
files:
- filename: gemma-2-9b-it-Q4_K_M.gguf
sha256: 0874bf61be2e4b3d0a4a75e58fbd442dc410745d513c1e1e5de0b54ae33e65db
uri: huggingface://bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf
sha256: c70fd20caec79fb953b83031c46ddea4e99905835a66af7b8a856aa1b2534614
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1268,6 +1268,83 @@
- filename: LLaMA3-iterative-DPO-final-Q4_K_M.gguf
sha256: 480703ff85af337e1db2a9d9a678a3ac8ca0802e366b14d9c59b81d3fc689da8
uri: huggingface://bartowski/LLaMA3-iterative-DPO-final-GGUF/LLaMA3-iterative-DPO-final-Q4_K_M.gguf
- !!merge <<: *llama3
name: "new-dawn-llama-3-70b-32K-v1.0"
urls:
- https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF
- https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0
icon: https://imgur.com/tKzncGo.png
description: |
This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork.
This model is uncensored. You are responsible for whatever you do with it.
This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.
overrides:
parameters:
model: New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf
files:
- filename: New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf
sha256: 30561ae5decac4ad46775c76a9a40fb43436ade96bc132b4b9cc6749b9e2f448
uri: huggingface://bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF/New-Dawn-Llama-3-70B-32K-v1.0-Q4_K_M.gguf
- !!merge <<: *llama3
name: "l3-aethora-15b-v2"
urls:
- https://huggingface.co/bartowski/L3-Aethora-15B-V2-GGUF
- https://huggingface.co/ZeusLabs/L3-Aethora-15B-V2
icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/yJpwVd5UTnAVDoEPVVCS1.png
description: |
L3-Aethora-15B v2 is an advanced language model built upon the Llama 3 architecture. It employs state-of-the-art training techniques and a curated dataset to deliver enhanced performance across a wide range of tasks.
overrides:
parameters:
model: L3-Aethora-15B-V2-Q4_K_M.gguf
files:
- filename: L3-Aethora-15B-V2-Q4_K_M.gguf
sha256: 014a215739e1574e354780f218776e54807548d0c32555274c4d96d7628f29b6
uri: huggingface://bartowski/L3-Aethora-15B-V2-GGUF/L3-Aethora-15B-V2-Q4_K_M.gguf
- !!merge <<: *llama3
name: "bungo-l3-8b-iq-imatrix"
urls:
- https://huggingface.co/Lewdiculous/Bungo-L3-8B-GGUF-IQ-Imatrix-Request
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/ezaxE50ef-7RsFi3gUbNp.webp
description: |
An experimental model that turned really well. Scores high on Chai leaderboard (slerp8bv2 there). Feel smarter than average L3 merges for RP.
overrides:
parameters:
model: Bungo-L3-8B-Q4_K_M-imat.gguf
files:
- filename: Bungo-L3-8B-Q4_K_M-imat.gguf
sha256: 88d0139954e8f9525b80636a6269df885008c4837a1332f84f9a5dc6f37c9b8f
uri: huggingface://Lewdiculous/Bungo-L3-8B-GGUF-IQ-Imatrix-Request/Bungo-L3-8B-Q4_K_M-imat.gguf
- !!merge <<: *llama3
name: "llama3-8b-darkidol-2.1-uncensored-1048k-iq-imatrix"
urls:
- https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/tKL5W1G5WCHm4609LEmiM.png
description: |
The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
Uncensored 1048K
overrides:
parameters:
model: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
files:
- filename: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
sha256: 86f0f1e10fc315689e09314aebb7354bb40d8fe95de008d21a75dc8fff1cd2fe
uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
- !!merge <<: *llama3
name: "llama3-turbcat-instruct-8b"
urls:
- https://huggingface.co/turboderp/llama3-turbcat-instruct-8b
- https://huggingface.co/bartowski/llama3-turbcat-instruct-8b-GGUF
icon: https://huggingface.co/turboderp/llama3-turbcat-instruct-8b/resolve/main/8.png
description: |
This is a direct upgrade over cat 70B, with 2x the dataset size(2GB-> 5GB), added Chinese support with quality on par with the original English dataset. The medical COT portion of the dataset has been sponsored by steelskull, and the action packed character play portion was donated by Gryphe's(aesir dataset). Note that 8b is based on llama3 with limited Chinese support due to base model choice. The chat format in 8b is llama3. The 72b has more comprehensive Chinese support and the format will be chatml.
overrides:
parameters:
model: llama3-turbcat-instruct-8b-Q4_K_M.gguf
files:
- filename: llama3-turbcat-instruct-8b-Q4_K_M.gguf
sha256: a9a36e3220d901a8ad80c75608a81aaeed3a9cdf111247462bf5e3443aad5461
uri: huggingface://bartowski/llama3-turbcat-instruct-8b-GGUF/llama3-turbcat-instruct-8b-Q4_K_M.gguf
- &dolphin
name: "dolphin-2.9-llama3-8b"
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -2627,6 +2704,69 @@
- filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
- &llm-compiler
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
name: "llm-compiler-13b-imat"
license: other
description: |
LLM Compiler is a state-of-the-art LLM that builds upon Code Llama with improved performance for code optimization and compiler reasoning.
LLM Compiler is free for both research and commercial use.
LLM Compiler is available in two flavors:
LLM Compiler, the foundational models, pretrained on over 500B tokens of LLVM-IR, x86_84, ARM, and CUDA assembly codes and trained to predict the effect of LLVM optimizations;
and LLM Compiler FTD, which is further fine-tuned to predict the best optimizations for code in LLVM assembly to reduce code size, and to disassemble assembly code to LLVM-IR.
urls:
- https://huggingface.co/legraphista/llm-compiler-13b-IMat-GGUF
- https://huggingface.co/facebook/llm-compiler-13b
tags:
- llm
- gguf
- gpu
- code
- cpu
overrides:
parameters:
model: llm-compiler-13b.Q4_K.gguf
files:
- filename: "llm-compiler-13b.Q4_K.gguf"
uri: "huggingface://legraphista/llm-compiler-13b-IMat-GGUF/llm-compiler-13b.Q4_K.gguf"
sha256: dad41a121d0d67432c289aba8ffffc93159e2b24ca3d1c62e118c9f4cbf0c890
- !!merge <<: *llm-compiler
name: "llm-compiler-13b-ftd"
urls:
- https://huggingface.co/QuantFactory/llm-compiler-13b-ftd-GGUF
- https://huggingface.co/facebook/llm-compiler-13b-ftd
overrides:
parameters:
model: llm-compiler-13b-ftd.Q4_K_M.gguf
files:
- filename: "llm-compiler-13b-ftd.Q4_K_M.gguf"
uri: "huggingface://QuantFactory/llm-compiler-13b-ftd-GGUF/llm-compiler-13b-ftd.Q4_K_M.gguf"
sha256: a5d19ae6b3fbe6724784363161b66cd2c8d8a3905761c0fb08245b3c03697db1
- !!merge <<: *llm-compiler
name: "llm-compiler-7b-imat-GGUF"
urls:
- https://huggingface.co/legraphista/llm-compiler-7b-IMat-GGUF
- https://huggingface.co/facebook/llm-compiler-7b
overrides:
parameters:
model: llm-compiler-7b.Q4_K.gguf
files:
- filename: "llm-compiler-7b.Q4_K.gguf"
uri: "huggingface://legraphista/llm-compiler-7b-IMat-GGUF/llm-compiler-7b.Q4_K.gguf"
sha256: 84926979701fa4591ff5ede94a6c5829a62efa620590e5815af984707d446926
- !!merge <<: *llm-compiler
name: "llm-compiler-7b-ftd-imat"
urls:
- https://huggingface.co/legraphista/llm-compiler-7b-ftd-IMat-GGUF
- https://huggingface.co/facebook/llm-compiler-7b-ftd
overrides:
parameters:
model: llm-compiler-7b-ftd.Q4_K.gguf
files:
- filename: "llm-compiler-7b-ftd.Q4_K.gguf"
uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
- &openvino
### START OpenVINO
url: "github:mudler/LocalAI/gallery/openvino.yaml@master"