mirror of
https://github.com/ollama/ollama.git
synced 2026-01-03 13:10:17 -05:00
Compare commits
20 Commits
brucemacd/
...
brucemacd/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2b11611a8 | ||
|
|
90698c7d15 | ||
|
|
4b4a5a28bf | ||
|
|
3c95c21ddf | ||
|
|
8ab13e4d3e | ||
|
|
144f63e2fb | ||
|
|
294b6f5a22 | ||
|
|
7bb356c680 | ||
|
|
021817e59a | ||
|
|
a420a453b4 | ||
|
|
42cf4db601 | ||
|
|
93a8daf285 | ||
|
|
a041b4df7c | ||
|
|
2539f2dbf9 | ||
|
|
61676fb506 | ||
|
|
f6f3713001 | ||
|
|
a30f347201 | ||
|
|
74ea4fb604 | ||
|
|
6982e9cc96 | ||
|
|
ab39872cb4 |
9
.gitattributes
vendored
9
.gitattributes
vendored
@@ -7,5 +7,14 @@ llama/**/*.cuh linguist-vendored
|
||||
llama/**/*.m linguist-vendored
|
||||
llama/**/*.metal linguist-vendored
|
||||
|
||||
ml/backend/**/*.c linguist-vendored
|
||||
ml/backend/**/*.h linguist-vendored
|
||||
ml/backend/**/*.cpp linguist-vendored
|
||||
ml/backend/**/*.hpp linguist-vendored
|
||||
ml/backend/**/*.cu linguist-vendored
|
||||
ml/backend/**/*.cuh linguist-vendored
|
||||
ml/backend/**/*.m linguist-vendored
|
||||
ml/backend/**/*.metal linguist-vendored
|
||||
|
||||
* text=auto
|
||||
*.go text eol=lf
|
||||
|
||||
270
.github/workflows/release.yaml
vendored
270
.github/workflows/release.yaml
vendored
@@ -478,243 +478,77 @@ jobs:
|
||||
dist/OllamaSetup.exe
|
||||
dist/ollama-windows-*.zip
|
||||
|
||||
# Linux x86 assets built using the container based build
|
||||
build-linux-amd64:
|
||||
build-linux:
|
||||
environment: release
|
||||
runs-on: linux
|
||||
env:
|
||||
PLATFORM: linux/amd64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Set Version
|
||||
shell: bash
|
||||
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
||||
- run: |
|
||||
./scripts/build_linux.sh
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-linux-amd64
|
||||
path: |
|
||||
dist/*linux*
|
||||
!dist/*-cov
|
||||
|
||||
# Linux ARM assets built using the container based build
|
||||
# (at present, docker isn't pre-installed on arm ubunutu images)
|
||||
build-linux-arm64:
|
||||
environment: release
|
||||
runs-on: linux-arm64
|
||||
env:
|
||||
PLATFORM: linux/arm64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Set Version
|
||||
shell: bash
|
||||
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
||||
- name: 'Install Docker'
|
||||
run: |
|
||||
# Add Docker's official GPG key:
|
||||
env
|
||||
uname -a
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ca-certificates curl
|
||||
sudo install -m 0755 -d /etc/apt/keyrings
|
||||
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||||
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
||||
|
||||
# Add the repository to Apt sources:
|
||||
echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
|
||||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
|
||||
sudo usermod -aG docker $USER
|
||||
sudo apt-get install acl
|
||||
sudo setfacl --modify user:$USER:rw /var/run/docker.sock
|
||||
- run: |
|
||||
./scripts/build_linux.sh
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-linux-arm64
|
||||
path: |
|
||||
dist/*linux*
|
||||
!dist/*-cov
|
||||
|
||||
# Container image build
|
||||
build-container-image:
|
||||
environment: release
|
||||
strategy:
|
||||
matrix:
|
||||
runner:
|
||||
- linux
|
||||
- linux-arm64
|
||||
runs-on: ${{ matrix.runner }}
|
||||
env:
|
||||
FINAL_IMAGE_REPO: ollama/ollama
|
||||
include:
|
||||
- os: linux
|
||||
arch: amd64
|
||||
targets: [archive, rocm]
|
||||
- os: linux
|
||||
arch: arm64
|
||||
targets: [archive]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: docker/setup-qemu-action@v3
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
- run: |
|
||||
apt-get update && apt-get install pigz
|
||||
for TARGET in ${{ matrix.targets }}; do docker buildx build --platform $PLATFORM --target $TARGET --output type=local,dest=dist/$PLATFORM .; done
|
||||
tar c -C dist/$PLATFORM . | pigz -9cv >dist/ollama-${PLATFORM//\//-}.tar.gz
|
||||
env:
|
||||
PLATFORM: ${{ matrix.os }}/${{ matrix.arch }}
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: 'Install Docker'
|
||||
if: ${{ startsWith(matrix.runner, 'linux-arm64') }}
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ca-certificates curl
|
||||
sudo install -m 0755 -d /etc/apt/keyrings
|
||||
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||||
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
|
||||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
|
||||
sudo usermod -aG docker $USER
|
||||
sudo apt-get install acl
|
||||
sudo setfacl --modify user:$USER:rw /var/run/docker.sock
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.FINAL_IMAGE_REPO }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
|
||||
type=semver,pattern={{version}}
|
||||
- name: Set Version
|
||||
shell: bash
|
||||
run: |
|
||||
machine=$(uname -m)
|
||||
case ${machine} in
|
||||
x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
|
||||
aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
|
||||
esac >>$GITHUB_ENV
|
||||
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ vars.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: "."
|
||||
platforms: linux/${{ env.ARCH }}
|
||||
build-args: |
|
||||
GOFLAGS
|
||||
outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: digests-${{ env.PLATFORM_PAIR }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
merge:
|
||||
name: dist-${{ matrix.os }}-${{ matrix.arch }}
|
||||
path: |
|
||||
dist/ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.gz
|
||||
|
||||
build-docker:
|
||||
environment: release
|
||||
runs-on: linux
|
||||
needs:
|
||||
- build-container-image
|
||||
env:
|
||||
FINAL_IMAGE_REPO: ollama/ollama
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- flavor: |
|
||||
latest=auto
|
||||
platforms: linux/amd64,linux/arm64
|
||||
build-args: [GOFLAGS]
|
||||
- flavor: |
|
||||
suffix=-rocm,onlatest=false
|
||||
platforms: linux/amd64
|
||||
build-args: [GOFLAGS, FLAVOR=rocm]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
merge-multiple: true
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.FINAL_IMAGE_REPO }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
|
||||
type=semver,pattern={{version}}
|
||||
- name: Set Version
|
||||
shell: bash
|
||||
run: |
|
||||
machine=$(uname -m)
|
||||
case ${machine} in
|
||||
x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
|
||||
aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
|
||||
esac >>$GITHUB_ENV
|
||||
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
- uses: docker/setup-qemu-action@v2
|
||||
- uses: docker/setup-buildx-action@v2
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ vars.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *)
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}
|
||||
build-container-image-rocm:
|
||||
environment: release
|
||||
runs-on: linux
|
||||
env:
|
||||
FINAL_IMAGE_REPO: ollama/ollama
|
||||
ARCH: amd64
|
||||
PLATFORM_PAIR: linux-amd64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- id: metadata
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.FINAL_IMAGE_REPO }}
|
||||
flavor: |
|
||||
latest=false
|
||||
flavor: ${{ matrix.flavor }}
|
||||
images: |
|
||||
ollama/ollama
|
||||
tags: |
|
||||
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
|
||||
type=semver,pattern={{version}}
|
||||
- name: Set Version
|
||||
shell: bash
|
||||
run: |
|
||||
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
- uses: docker/build-push-action@v6
|
||||
with:
|
||||
username: ${{ vars.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: "."
|
||||
target: runtime-rocm
|
||||
build-args: |
|
||||
GOFLAGS
|
||||
tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm
|
||||
context: .
|
||||
push: true
|
||||
platforms: ${{ matrix.platforms }}
|
||||
build-args: ${{ matrix.build-args }}
|
||||
tags: ${{ steps.metadata.outputs.tags }}
|
||||
labels: ${{ steps.metadata.outputs.labels }}
|
||||
cache-from: type=registry,ref=ollama/ollama:latest
|
||||
cache-to: type=inline
|
||||
provenance: false
|
||||
env:
|
||||
GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ steps.metadata.outputs.version }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
|
||||
|
||||
# Aggregate all the assets and ship a release
|
||||
release:
|
||||
|
||||
313
.github/workflows/test.yaml
vendored
313
.github/workflows/test.yaml
vendored
@@ -1,11 +1,5 @@
|
||||
name: test
|
||||
|
||||
env:
|
||||
ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
|
||||
MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe
|
||||
CUDA_12_WINDOWS_URL: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
|
||||
CUDA_12_WINDOWS_VER: 12.4
|
||||
|
||||
concurrency:
|
||||
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
|
||||
# cancels running CI jobs and starts all new ones.
|
||||
@@ -27,7 +21,7 @@ jobs:
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
RUNNERS: ${{ steps.changes.outputs.RUNNERS }}
|
||||
changed: ${{ steps.changes.outputs.changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -35,309 +29,66 @@ jobs:
|
||||
- id: changes
|
||||
run: |
|
||||
changed() {
|
||||
git diff-tree -r --no-commit-id --name-only \
|
||||
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
|
||||
${{ github.event.pull_request.head.sha }} \
|
||||
local BASE=${{ github.event.pull_request.base.sha }}
|
||||
local HEAD=${{ github.event.pull_request.head.sha }}
|
||||
local MERGE_BASE=$(git merge-base $BASE $HEAD)
|
||||
git diff-tree -r --no-commit-id --name-only "$MERGE_BASE" "$HEAD" \
|
||||
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
|
||||
}
|
||||
|
||||
{
|
||||
echo RUNNERS=$(changed 'llama/**')
|
||||
} >>$GITHUB_OUTPUT
|
||||
echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
|
||||
|
||||
runners-linux-cuda:
|
||||
linux:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
if: ${{ needs.changes.outputs.changed == 'True' }}
|
||||
strategy:
|
||||
matrix:
|
||||
cuda-version:
|
||||
- '11.8.0'
|
||||
runs-on: linux
|
||||
container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
|
||||
include:
|
||||
- container: nvidia/cuda:11.8.0-devel-ubuntu22.04
|
||||
preset: CUDA
|
||||
- container: rocm/dev-ubuntu-22.04:6.1.2
|
||||
preset: ROCm
|
||||
extra-packages: rocm-libs
|
||||
runs-on: ubuntu-latest
|
||||
container: ${{ matrix.container }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- run: |
|
||||
apt-get update && apt-get install -y git build-essential curl
|
||||
apt-get update
|
||||
apt-get install -y cmake pkg-config ccache ${{ matrix.extra-packages }}
|
||||
ccache -o cache_dir=${{ github.workspace }}\.ccache
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v4
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
path: ${{ github.workspace }}\.ccache
|
||||
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
|
||||
- run: |
|
||||
git config --global --add safe.directory /__w/ollama/ollama
|
||||
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
|
||||
make -j $cores cuda_v11
|
||||
runners-linux-rocm:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
strategy:
|
||||
matrix:
|
||||
rocm-version:
|
||||
- '6.1.2'
|
||||
runs-on: linux
|
||||
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
|
||||
steps:
|
||||
- run: |
|
||||
apt-get update && apt-get install -y git build-essential curl rocm-libs
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
git config --global --add safe.directory /__w/ollama/ollama
|
||||
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
|
||||
make -j $cores rocm
|
||||
cmake --preset ${{ matrix.preset }}
|
||||
cmake --build --preset ${{ matrix.preset }} --parallel
|
||||
|
||||
# ROCm generation step
|
||||
runners-windows-rocm:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
runs-on: windows
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: Set make jobs default
|
||||
run: |
|
||||
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
# ROCM installation steps
|
||||
- name: 'Cache ROCm installer'
|
||||
id: cache-rocm
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: rocm-install.exe
|
||||
key: ${{ env.ROCM_WINDOWS_URL }}
|
||||
- name: 'Conditionally Download ROCm'
|
||||
if: steps.cache-rocm.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe"
|
||||
- name: 'Install ROCm'
|
||||
run: |
|
||||
Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
||||
- name: 'Verify ROCm'
|
||||
run: |
|
||||
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
||||
echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
- name: Add msys paths
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
|
||||
- name: make rocm runner
|
||||
run: |
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make -C llama print-HIP_PATH print-HIP_LIB_DIR
|
||||
make rocm
|
||||
|
||||
# CUDA generation step
|
||||
runners-windows-cuda:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
runs-on: windows
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: Set make jobs default
|
||||
run: |
|
||||
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
# CUDA installation steps
|
||||
- name: 'Cache CUDA installer'
|
||||
id: cache-cuda
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: cuda-install.exe
|
||||
key: ${{ env.CUDA_12_WINDOWS_URL }}
|
||||
- name: 'Conditionally Download CUDA'
|
||||
if: steps.cache-cuda.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
Invoke-WebRequest -Uri "${env:CUDA_12_WINDOWS_URL}" -OutFile "cuda-install.exe"
|
||||
- name: 'Install CUDA'
|
||||
run: |
|
||||
$subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ env.CUDA_12_WINDOWS_VER }}"}
|
||||
Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
|
||||
- name: 'Verify CUDA'
|
||||
run: |
|
||||
& (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version
|
||||
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
|
||||
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
|
||||
echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
- name: Add msys paths
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- name: make cuda runner
|
||||
run: |
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
|
||||
|
||||
runners-cpu:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64, arm64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
ARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: Add msys paths
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- name: 'Build Windows Go Runners'
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
$gopath=(get-command go).source | split-path -parent
|
||||
$gccpath=(get-command gcc).source | split-path -parent
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$gccpath;$env:PATH"
|
||||
echo $env:PATH
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make -j 4
|
||||
- name: 'Build Unix Go Runners'
|
||||
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||
run: make -j 4
|
||||
- run: go build .
|
||||
|
||||
lint:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64, arm64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
- os: macos-latest
|
||||
arch: amd64
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Add msys paths
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: false
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
amd64) echo ARCH=x86_64 ;;
|
||||
arm64) echo ARCH=arm64 ;;
|
||||
esac >>$GITHUB_ENV
|
||||
shell: bash
|
||||
- uses: golangci/golangci-lint-action@v6
|
||||
with:
|
||||
args: --timeout 10m0s -v
|
||||
test:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Add msys paths
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
amd64) echo ARCH=amd64 ;;
|
||||
arm64) echo ARCH=arm64 ;;
|
||||
esac >>$GITHUB_ENV
|
||||
shell: bash
|
||||
- uses: golangci/golangci-lint-action@v6
|
||||
with:
|
||||
args: --timeout 10m0s -v
|
||||
- run: go test ./...
|
||||
|
||||
patches:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Verify patches carry all the changes
|
||||
- name: Verify patches apply cleanly and do not change files
|
||||
run: |
|
||||
make apply-patches sync && git diff --compact-summary --exit-code llama
|
||||
make -f Makefile2 clean checkout sync
|
||||
git diff --compact-summary --exit-code
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
"trailingComma": "es5",
|
||||
"tabWidth": 2,
|
||||
"useTabs": false,
|
||||
"semi": false,
|
||||
"singleQuote": true,
|
||||
"jsxSingleQuote": true,
|
||||
"printWidth": 120,
|
||||
"arrowParens": "avoid"
|
||||
}
|
||||
54
CMakeLists.txt
Normal file
54
CMakeLists.txt
Normal file
@@ -0,0 +1,54 @@
|
||||
cmake_minimum_required(VERSION 3.21)
|
||||
|
||||
project(Ollama C CXX)
|
||||
|
||||
include(CheckLanguage)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
set(GGML_BUILD ON)
|
||||
set(GGML_SHARED ON)
|
||||
set(GGML_CCACHE ON)
|
||||
set(GGML_BACKEND_DL ON)
|
||||
set(GGML_BACKEND_SHARED ON)
|
||||
set(GGML_SCHED_MAX_COPIES 4)
|
||||
|
||||
set(GGML_LLAMAFILE ON)
|
||||
set(GGML_CPU_ALL_VARIANTS ON)
|
||||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
||||
set(GGML_CUDA_GRAPHS ON)
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
|
||||
|
||||
set(GGML_CPU ON)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
||||
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
|
||||
|
||||
check_language(CUDA)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES)
|
||||
set(CMAKE_CUDA_ARCHITECTURES "native")
|
||||
endif()
|
||||
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
||||
endif()
|
||||
|
||||
check_language(HIP)
|
||||
if(CMAKE_HIP_COMPILER)
|
||||
set(HIP_PLATFORM "amd")
|
||||
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
|
||||
endif()
|
||||
109
CMakePresets.json
Normal file
109
CMakePresets.json
Normal file
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"version": 3,
|
||||
"configurePresets": [
|
||||
{
|
||||
"name": "Default",
|
||||
"binaryDir": "${sourceDir}/build",
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Release"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "CPU",
|
||||
"inherits": [ "Default" ]
|
||||
},
|
||||
{
|
||||
"name": "CUDA",
|
||||
"inherits": [ "Default" ]
|
||||
},
|
||||
{
|
||||
"name": "CUDA 11",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;62;70;72;75;80;86"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "CUDA 12",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "60;61;62;70;72;75;80;86;87;89;90;90a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "JetPack 5",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "72;87"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "JetPack 6",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "87"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "ROCm",
|
||||
"inherits": [ "Default" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_HIP_PLATFORM": "amd"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "ROCm 6",
|
||||
"inherits": [ "ROCm" ],
|
||||
"cacheVariables": {
|
||||
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
||||
}
|
||||
}
|
||||
],
|
||||
"buildPresets": [
|
||||
{
|
||||
"name": "Default",
|
||||
"configurePreset": "Default",
|
||||
"configuration": "Release"
|
||||
},
|
||||
{
|
||||
"name": "CPU",
|
||||
"configurePreset": "Default",
|
||||
"targets": [ "ggml-cpu" ]
|
||||
},
|
||||
{
|
||||
"name": "CUDA",
|
||||
"configurePreset": "CUDA",
|
||||
"targets": [ "ggml-cuda" ]
|
||||
},
|
||||
{
|
||||
"name": "CUDA 11",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "CUDA 11"
|
||||
},
|
||||
{
|
||||
"name": "CUDA 12",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "CUDA 12"
|
||||
},
|
||||
{
|
||||
"name": "JetPack 5",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "JetPack 5"
|
||||
},
|
||||
{
|
||||
"name": "JetPack 6",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "JetPack 6"
|
||||
},
|
||||
{
|
||||
"name": "ROCm",
|
||||
"configurePreset": "ROCm",
|
||||
"targets": [ "ggml-hip" ]
|
||||
},
|
||||
{
|
||||
"name": "ROCm 6",
|
||||
"inherits": [ "ROCm" ],
|
||||
"configurePreset": "ROCm 6"
|
||||
}
|
||||
]
|
||||
}
|
||||
316
Dockerfile
316
Dockerfile
@@ -1,201 +1,161 @@
|
||||
ARG GOLANG_VERSION=1.22.8
|
||||
ARG CUDA_VERSION_11=11.3.1
|
||||
ARG CUDA_VERSION_12=12.4.0
|
||||
ARG ROCM_VERSION=6.1.2
|
||||
ARG JETPACK_6=r36.2.0
|
||||
ARG JETPACK_5=r35.4.1
|
||||
# vim: filetype=dockerfile
|
||||
|
||||
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
|
||||
#
|
||||
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
|
||||
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
|
||||
#
|
||||
### Then incremental builds will be much faster in this container
|
||||
#
|
||||
# make -j 10 dist
|
||||
#
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
|
||||
ARG GOLANG_VERSION
|
||||
ARG CUDA_VERSION_11
|
||||
ARG CUDA_VERSION_12
|
||||
COPY ./scripts/rh_linux_deps.sh /
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
||||
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
|
||||
dnf clean all && \
|
||||
dnf install -y \
|
||||
zsh \
|
||||
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
||||
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
||||
# TODO intel oneapi goes here...
|
||||
ENV GOARCH amd64
|
||||
ENV CGO_ENABLED 1
|
||||
WORKDIR /go/src/github.com/ollama/ollama/
|
||||
ENTRYPOINT [ "zsh" ]
|
||||
ARG FLAVOR=${TARGETARCH}
|
||||
|
||||
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
|
||||
# Note: this does not contain jetson variants
|
||||
#
|
||||
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
|
||||
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
|
||||
#
|
||||
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
|
||||
ARG GOLANG_VERSION
|
||||
ARG CUDA_VERSION_11
|
||||
ARG CUDA_VERSION_12
|
||||
COPY ./scripts/rh_linux_deps.sh /
|
||||
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
|
||||
dnf config-manager --set-enabled appstream && \
|
||||
dnf clean all && \
|
||||
dnf install -y \
|
||||
zsh \
|
||||
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
||||
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
|
||||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
||||
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
|
||||
ENV GOARCH arm64
|
||||
ENV CGO_ENABLED 1
|
||||
WORKDIR /go/src/github.com/ollama/ollama/
|
||||
ENTRYPOINT [ "zsh" ]
|
||||
ARG ROCMVERSION=6.1.2
|
||||
ARG JETPACK5VERSION=r35.4.1
|
||||
ARG JETPACK6VERSION=r36.2.0
|
||||
ARG CMAKEVERSION=3.31.2
|
||||
|
||||
FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64
|
||||
COPY . .
|
||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
||||
ARG OLLAMA_SKIP_ROCM_GENERATE
|
||||
ARG OLLAMA_FAST_BUILD
|
||||
ARG VERSION
|
||||
ARG CUSTOM_CPU_FLAGS
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCMVERSION}-complete AS base-amd64
|
||||
RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
|
||||
&& yum install -y yum-utils devtoolset-10-gcc devtoolset-10-gcc-c++ \
|
||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \
|
||||
&& curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1
|
||||
ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:/opt/rh/devtoolset-11/root/usr/bin:$PATH
|
||||
|
||||
FROM --platform=linux/arm64 rockylinux:8 AS base-arm64
|
||||
# install epel-release for ccache
|
||||
RUN yum install -y yum-utils epel-release \
|
||||
&& yum install -y clang ccache \
|
||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
||||
ENV CC=clang CXX=clang++
|
||||
|
||||
FROM base-${TARGETARCH} AS base
|
||||
ARG CMAKEVERSION
|
||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||
COPY CMakeLists.txt CMakePresets.json .
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
ENV LDFLAGS=-s
|
||||
|
||||
FROM base AS cpu
|
||||
# amd64 uses gcc which requires devtoolset-11 for AVX extensions while arm64 uses clang
|
||||
RUN if [ "$(uname -m)" = "x86_64" ]; then yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++; fi
|
||||
ENV PATH=/opt/rh/devtoolset-11/root/usr/bin:$PATH
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
|
||||
make -j $(nproc) dist ; \
|
||||
else \
|
||||
make -j 5 dist ; \
|
||||
fi
|
||||
RUN cd dist/linux-$GOARCH && \
|
||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
||||
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
|
||||
cd dist/linux-$GOARCH-rocm && \
|
||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
|
||||
fi
|
||||
cmake --preset 'CPU' && cmake --build --parallel --preset 'CPU'
|
||||
|
||||
# Jetsons need to be built in discrete stages
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
|
||||
ARG GOLANG_VERSION
|
||||
RUN apt-get update && apt-get install -y git curl ccache && \
|
||||
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
|
||||
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
|
||||
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /go/src/github.com/ollama/ollama/
|
||||
COPY . .
|
||||
ARG CGO_CFLAGS
|
||||
ENV GOARCH arm64
|
||||
ARG VERSION
|
||||
FROM base AS cuda-11
|
||||
ARG CUDA11VERSION=11.3
|
||||
RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-}
|
||||
ENV PATH=/usr/local/cuda-11/bin:$PATH
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
make -j 5 dist_cuda_v11 \
|
||||
CUDA_ARCHITECTURES="72;87" \
|
||||
GPU_RUNNER_VARIANT=_jetpack5 \
|
||||
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
|
||||
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
|
||||
cmake --preset 'CUDA 11' && cmake --build --parallel --preset 'CUDA 11'
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
|
||||
ARG GOLANG_VERSION
|
||||
RUN apt-get update && apt-get install -y git curl ccache && \
|
||||
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
|
||||
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
|
||||
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /go/src/github.com/ollama/ollama/
|
||||
COPY . .
|
||||
ARG CGO_CFLAGS
|
||||
ENV GOARCH arm64
|
||||
ARG VERSION
|
||||
FROM base AS cuda-12
|
||||
ARG CUDA12VERSION=12.4
|
||||
RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-}
|
||||
ENV PATH=/usr/local/cuda-12/bin:$PATH
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
make -j 5 dist_cuda_v12 \
|
||||
CUDA_ARCHITECTURES="87" \
|
||||
GPU_RUNNER_VARIANT=_jetpack6 \
|
||||
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
|
||||
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
|
||||
cmake --preset 'CUDA 12' && cmake --build --parallel --preset 'CUDA 12'
|
||||
|
||||
FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64
|
||||
COPY . .
|
||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
||||
ARG OLLAMA_FAST_BUILD
|
||||
ARG VERSION
|
||||
FROM base AS rocm-6
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
make -j 5 dist
|
||||
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||
RUN cd dist/linux-$GOARCH && \
|
||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
||||
RUN cd dist/linux-$GOARCH-jetpack5 && \
|
||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
|
||||
RUN cd dist/linux-$GOARCH-jetpack6 && \
|
||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
|
||||
cmake --preset 'ROCm 6' && cmake --build --parallel --preset 'ROCm 6'
|
||||
|
||||
FROM --platform=linux/amd64 scratch AS dist-amd64
|
||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
||||
FROM --platform=linux/arm64 scratch AS dist-arm64
|
||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
||||
FROM dist-$TARGETARCH AS dist
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
|
||||
ARG CMAKEVERSION
|
||||
RUN apt-get update && apt-get install -y curl ccache \
|
||||
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||
COPY CMakeLists.txt CMakePresets.json .
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'JetPack 5' && cmake --build --parallel --preset 'JetPack 5'
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
|
||||
ARG CMAKEVERSION
|
||||
RUN apt-get update && apt-get install -y curl ccache \
|
||||
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
|
||||
COPY CMakeLists.txt CMakePresets.json .
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
cmake --preset 'JetPack 6' && cmake --build --parallel --preset 'JetPack 6'
|
||||
|
||||
# For amd64 container images, filter out cuda/rocm to minimize size
|
||||
FROM build-amd64 AS runners-cuda-amd64
|
||||
RUN rm -rf \
|
||||
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
|
||||
./dist/linux-amd64/lib/ollama/runners/rocm*
|
||||
FROM base AS build
|
||||
ARG GOVERSION=1.23.4
|
||||
RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
|
||||
ENV PATH=/usr/local/go/bin:$PATH
|
||||
WORKDIR /go/src/github.com/ollama/ollama
|
||||
COPY . .
|
||||
ARG GOFLAGS="'-ldflags=-w -s'"
|
||||
ENV CGO_ENABLED=1
|
||||
RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||
go build -trimpath -buildmode=pie -o /bin/ollama .
|
||||
|
||||
FROM build-amd64 AS runners-rocm-amd64
|
||||
RUN rm -rf \
|
||||
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
|
||||
./dist/linux-amd64/lib/ollama/libcu*.so* \
|
||||
./dist/linux-amd64/lib/ollama/runners/cuda*
|
||||
FROM --platform=linux/amd64 scratch AS amd64
|
||||
COPY --from=cuda-11 --chmod=644 \
|
||||
build/lib/libggml-cuda.so \
|
||||
/usr/local/cuda/lib64/libcublas.so.11 \
|
||||
/usr/local/cuda/lib64/libcublasLt.so.11 \
|
||||
/usr/local/cuda/lib64/libcudart.so.11.0 \
|
||||
/lib/ollama/cuda_v11/
|
||||
COPY --from=cuda-12 --chmod=644 \
|
||||
build/lib/libggml-cuda.so \
|
||||
/usr/local/cuda/lib64/libcublas.so.12 \
|
||||
/usr/local/cuda/lib64/libcublasLt.so.12 \
|
||||
/usr/local/cuda/lib64/libcudart.so.12 \
|
||||
/lib/ollama/cuda_v12/
|
||||
|
||||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
||||
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
||||
FROM --platform=linux/arm64 scratch AS arm64
|
||||
COPY --from=cuda-11 --chmod=644 \
|
||||
build/lib/libggml-cuda.so \
|
||||
/usr/local/cuda/lib64/libcublas.so.11 \
|
||||
/usr/local/cuda/lib64/libcublasLt.so.11 \
|
||||
/usr/local/cuda/lib64/libcudart.so.11.0 \
|
||||
/lib/ollama/cuda_v11/
|
||||
COPY --from=cuda-12 --chmod=644 \
|
||||
build/lib/libggml-cuda.so \
|
||||
/usr/local/cuda/lib64/libcublas.so.12 \
|
||||
/usr/local/cuda/lib64/libcublasLt.so.12 \
|
||||
/usr/local/cuda/lib64/libcudart.so.12 \
|
||||
/lib/ollama/cuda_v12/
|
||||
COPY --from=jetpack-5 --chmod=644 \
|
||||
build/lib/libggml-cuda.so \
|
||||
/usr/local/cuda/lib64/libcublas.so.11 \
|
||||
/usr/local/cuda/lib64/libcublasLt.so.11 \
|
||||
/usr/local/cuda/lib64/libcudart.so.11.0 \
|
||||
/lib/ollama/cuda_jetpack5/
|
||||
COPY --from=jetpack-6 --chmod=644 \
|
||||
build/lib/libggml-cuda.so \
|
||||
/usr/local/cuda/lib64/libcublas.so.12 \
|
||||
/usr/local/cuda/lib64/libcublasLt.so.12 \
|
||||
/usr/local/cuda/lib64/libcudart.so.12 \
|
||||
/lib/ollama/cuda_jetpack6/
|
||||
|
||||
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
|
||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
|
||||
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
|
||||
FROM --platform=linux/arm64 scratch AS rocm
|
||||
COPY --from=rocm-6 --chmod=644 \
|
||||
build/lib/libggml-hip.so \
|
||||
/opt/rocm/lib/libamdhip64.so.6 \
|
||||
/opt/rocm/lib/libhipblas.so.2 \
|
||||
/opt/rocm/lib/librocblas.so.4 \
|
||||
/opt/rocm/lib/libamd_comgr.so.2 \
|
||||
/opt/rocm/lib/libhsa-runtime64.so.1 \
|
||||
/opt/rocm/lib/librocprofiler-register.so.0 \
|
||||
/opt/amdgpu/lib64/libdrm_amdgpu.so.1 \
|
||||
/opt/amdgpu/lib64/libdrm.so.2 \
|
||||
/usr/lib64/libnuma.so.1 \
|
||||
/lib/ollama/rocm/
|
||||
COPY --from=rocm-6 /opt/rocm/lib/rocblas/ /lib/ollama/rocm/rocblas/
|
||||
|
||||
FROM ${FLAVOR} AS archive
|
||||
COPY --from=cpu --chmod=644 \
|
||||
build/lib/libggml-base.so \
|
||||
build/lib/libggml-cpu-*.so \
|
||||
/lib/ollama/
|
||||
COPY --from=build /bin/ollama /bin/ollama
|
||||
|
||||
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
|
||||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
|
||||
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
|
||||
# across releases
|
||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
||||
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
||||
|
||||
EXPOSE 11434
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
CMD ["serve"]
|
||||
|
||||
FROM runtime-$TARGETARCH
|
||||
EXPOSE 11434
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
FROM ubuntu:20.04
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y ca-certificates \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=archive /bin/ /usr/bin/
|
||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
COPY --from=archive /lib/ollama/ /usr/lib/ollama/
|
||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
ENV OLLAMA_HOST=0.0.0.0:11434
|
||||
EXPOSE 11434
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
CMD ["serve"]
|
||||
|
||||
103
Makefile
103
Makefile
@@ -1,103 +0,0 @@
|
||||
# top level makefile for Ollama
|
||||
include make/common-defs.make
|
||||
|
||||
|
||||
# Determine which if any GPU runners we should build
|
||||
include make/cuda-v11-defs.make
|
||||
include make/cuda-v12-defs.make
|
||||
include make/rocm-defs.make
|
||||
|
||||
ifeq ($(CUSTOM_CPU_FLAGS),)
|
||||
ifeq ($(ARCH),amd64)
|
||||
RUNNER_TARGETS=cpu
|
||||
endif
|
||||
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
||||
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
||||
ifneq ($(CUDA_11_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v11
|
||||
endif
|
||||
ifneq ($(CUDA_12_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v12
|
||||
endif
|
||||
endif
|
||||
else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
|
||||
ifneq ($(CUDA_12_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v12
|
||||
else ifneq ($(CUDA_11_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v11
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),)
|
||||
ifneq ($(HIP_COMPILER),)
|
||||
RUNNER_TARGETS += rocm
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
all: runners exe
|
||||
|
||||
dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe
|
||||
|
||||
dist_%:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.$* dist
|
||||
|
||||
runners: $(RUNNER_TARGETS)
|
||||
|
||||
$(RUNNER_TARGETS):
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.$@
|
||||
|
||||
exe dist_exe:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.ollama $@
|
||||
|
||||
help-sync apply-patches create-patches sync sync-clean:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.sync $@
|
||||
|
||||
test integration lint:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.test $@
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE)
|
||||
go clean -cache
|
||||
|
||||
help:
|
||||
@echo "The following make targets will help you build Ollama"
|
||||
@echo ""
|
||||
@echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable"
|
||||
@echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable"
|
||||
@echo " make <runner> # Build specific runners. Enabled: '$(RUNNER_TARGETS)'"
|
||||
@echo " make dist # Build the runners and primary ollama executable for distribution"
|
||||
@echo " make help-sync # Help information on vendor update targets"
|
||||
@echo " make help-runners # Help information on runner targets"
|
||||
@echo ""
|
||||
@echo "The following make targets will help you test Ollama"
|
||||
@echo ""
|
||||
@echo " make test # Run unit tests"
|
||||
@echo " make integration # Run integration tests. You must 'make all' first"
|
||||
@echo " make lint # Run lint and style tests"
|
||||
@echo ""
|
||||
@echo "For more information see 'docs/development.md'"
|
||||
@echo ""
|
||||
|
||||
|
||||
help-runners:
|
||||
@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'"
|
||||
@echo ""
|
||||
@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)"
|
||||
@echo ""
|
||||
@echo "# CUDA_PATH sets the location where CUDA toolkits are present"
|
||||
@echo "CUDA_PATH=$(CUDA_PATH)"
|
||||
@echo " CUDA_11_PATH=$(CUDA_11_PATH)"
|
||||
@echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)"
|
||||
@echo " CUDA_12_PATH=$(CUDA_12_PATH)"
|
||||
@echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)"
|
||||
@echo ""
|
||||
@echo "# HIP_PATH sets the location where the ROCm toolkit is present"
|
||||
@echo "HIP_PATH=$(HIP_PATH)"
|
||||
@echo " HIP_COMPILER=$(HIP_COMPILER)"
|
||||
|
||||
.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS)
|
||||
|
||||
# Handy debugging for make variables
|
||||
print-%:
|
||||
@echo '$*=$($*)'
|
||||
46
Makefile2
Normal file
46
Makefile2
Normal file
@@ -0,0 +1,46 @@
|
||||
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
|
||||
WORKDIR=llama/vendor
|
||||
FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c
|
||||
|
||||
all: sync
|
||||
|
||||
.PHONY: sync
|
||||
sync: llama/llama.cpp ml/backend/ggml/ggml
|
||||
|
||||
.PHONY: llama/llama.cpp
|
||||
llama/llama.cpp: llama/vendor/ apply_patches
|
||||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
||||
|
||||
.PHONY: ml/backend/ggml/ggml apply_patches
|
||||
ml/backend/ggml/ggml: llama/vendor/ggml/ apply_patches
|
||||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
||||
|
||||
PATCHES=$(wildcard llama/patches/*.patch)
|
||||
|
||||
.PHONY: apply_patches
|
||||
.NOTPARALLEL:
|
||||
apply_patches: $(addsuffix ed, $(PATCHES))
|
||||
|
||||
%.patched: %.patch
|
||||
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
|
||||
|
||||
.PHONY: checkout
|
||||
checkout: $(WORKDIR)
|
||||
git -C $(WORKDIR) fetch
|
||||
git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
|
||||
|
||||
$(WORKDIR):
|
||||
git clone $(UPSTREAM) $(WORKDIR)
|
||||
|
||||
.PHONE: format_patches
|
||||
format_patches: llama/patches
|
||||
git -C $(WORKDIR) format-patch \
|
||||
--no-signature \
|
||||
--no-numbered \
|
||||
--zero-commit \
|
||||
-o $(realpath $<) \
|
||||
$(FETCH_HEAD)
|
||||
|
||||
.PHONE: clean
|
||||
clean: checkout
|
||||
$(RM) $(addsuffix ed, $(PATCHES))
|
||||
@@ -137,7 +137,7 @@ ollama run mario
|
||||
Hello! It's your friend Mario.
|
||||
```
|
||||
|
||||
For more examples, see the [examples](examples) directory. For more information on working with a Modelfile, see the [Modelfile](docs/modelfile.md) documentation.
|
||||
For more information on working with a Modelfile, see the [Modelfile](docs/modelfile.md) documentation.
|
||||
|
||||
## CLI Reference
|
||||
|
||||
@@ -441,6 +441,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
||||
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
||||
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
||||
- [LangChain for .NET](https://github.com/tryAGI/LangChain) with [example](https://github.com/tryAGI/LangChain/blob/main/examples/LangChain.Samples.OpenAI/Program.cs)
|
||||
- [LLPhant](https://github.com/theodo-group/LLPhant?tab=readme-ov-file#ollama)
|
||||
- [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/ollama/) and [LlamaIndexTS](https://ts.llamaindex.ai/modules/llms/available_llms/ollama)
|
||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||
@@ -538,4 +539,5 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
### Observability
|
||||
|
||||
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
|
||||
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
|
||||
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
|
||||
- [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.
|
||||
|
||||
25
benchmark/README.md
Normal file
25
benchmark/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Benchmark
|
||||
|
||||
Performance benchmarking for Ollama.
|
||||
|
||||
## Prerequisites
|
||||
- Ollama server running locally (`127.0.0.1:11434`)
|
||||
- Desired models pre-downloaded (e.g., `llama3.2:1b`)
|
||||
|
||||
## Run Benchmark
|
||||
```bash
|
||||
# Run all tests
|
||||
go test -bench=. -timeout 30m ./...
|
||||
```
|
||||
|
||||
## New Runner Benchmark
|
||||
```bash
|
||||
go test -bench=Runner
|
||||
```
|
||||
|
||||
or to test multiple models:
|
||||
```bash
|
||||
# run this from within the benchmark directory
|
||||
# requires: llama3.2:1b, llama3.1:8b, llama3.3:70b
|
||||
sh new_runner.sh
|
||||
```
|
||||
72
benchmark/new_runner.sh
Normal file
72
benchmark/new_runner.sh
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
|
||||
kill_process_tree() {
|
||||
local pid=$1
|
||||
# Get all child processes using pgrep
|
||||
local children=$(pgrep -P $pid)
|
||||
|
||||
# Kill children first
|
||||
for child in $children; do
|
||||
kill_process_tree $child
|
||||
done
|
||||
|
||||
# Kill the parent process
|
||||
kill -9 $pid 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Function to run the runner and benchmark for a given model
|
||||
run_benchmark() {
|
||||
local model=$1
|
||||
|
||||
echo "Starting runner with model: $model"
|
||||
# Start the runner in background and save its PID
|
||||
go run ../cmd/runner/main.go --new-runner -model "$model" &
|
||||
runner_pid=$!
|
||||
|
||||
# Wait for the runner to initialize (adjust sleep time as needed)
|
||||
sleep 5
|
||||
|
||||
echo "Running benchmark..."
|
||||
# Run test and wait for it to complete
|
||||
go test -bench=Runner
|
||||
test_exit_code=$?
|
||||
|
||||
echo "Stopping runner process..."
|
||||
# Kill the runner process and all its children
|
||||
kill_process_tree $runner_pid
|
||||
|
||||
# Wait for the process to fully terminate
|
||||
wait $runner_pid 2>/dev/null || true
|
||||
|
||||
# Make sure no processes are still listening on port 8080
|
||||
lsof -t -i:8080 | xargs kill -9 2>/dev/null || true
|
||||
|
||||
# Additional sleep to ensure port is freed
|
||||
sleep 2
|
||||
|
||||
# Check if test failed
|
||||
if [ $test_exit_code -ne 0 ]; then
|
||||
echo "Warning: Benchmark test failed with exit code $test_exit_code"
|
||||
fi
|
||||
|
||||
echo "Benchmark complete for model: $model"
|
||||
echo "----------------------------------------"
|
||||
}
|
||||
|
||||
|
||||
HOME_DIR="$HOME"
|
||||
# llama3.2:1b: ~/.ollama/models/blobs/sha256-74701a8c35f6c8d9a4b91f3f3497643001d63e0c7a84e085bed452548fa88d45
|
||||
# llama3.1:8b: ~/.ollama/models/blobs/sha256-667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29
|
||||
# llama3.3:70b: ~/.ollama/models/blobs/sha256-4824460d29f2058aaf6e1118a63a7a197a09bed509f0e7d4e2efb1ee273b447d
|
||||
models=(
|
||||
"${HOME_DIR}/.ollama/models/blobs/sha256-74701a8c35f6c8d9a4b91f3f3497643001d63e0c7a84e085bed452548fa88d45"
|
||||
"${HOME_DIR}/.ollama/models/blobs/sha256-667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29"
|
||||
# "${HOME_DIR}/.ollama/models/blobs/sha256-4824460d29f2058aaf6e1118a63a7a197a09bed509f0e7d4e2efb1ee273b447d"
|
||||
)
|
||||
|
||||
# Run benchmarks for each model
|
||||
for model in "${models[@]}"; do
|
||||
run_benchmark "$model"
|
||||
done
|
||||
|
||||
echo "All benchmarks completed!"
|
||||
175
benchmark/new_runner_benchmark_test.go
Normal file
175
benchmark/new_runner_benchmark_test.go
Normal file
@@ -0,0 +1,175 @@
|
||||
package benchmark
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
runnerURL = "http://localhost:8080"
|
||||
warmupPrompts = 2 // Number of warm-up requests per test case
|
||||
warmupTokens = 50 // Smaller token count for warm-up requests
|
||||
)
|
||||
|
||||
var runnerMetrics []BenchmarkMetrics
|
||||
|
||||
// CompletionRequest represents the request body for the completion endpoint
|
||||
type CompletionRequest struct {
|
||||
Prompt string `json:"prompt"`
|
||||
NumPredict int `json:"n_predict"`
|
||||
Temperature float32 `json:"temperature"`
|
||||
}
|
||||
|
||||
// CompletionResponse represents a single response chunk from the streaming API
|
||||
type CompletionResponse struct {
|
||||
Content string `json:"content"`
|
||||
Stop bool `json:"stop"`
|
||||
Timings struct {
|
||||
PredictedN int `json:"predicted_n"`
|
||||
PredictedMs int `json:"predicted_ms"`
|
||||
PromptN int `json:"prompt_n"`
|
||||
PromptMs int `json:"prompt_ms"`
|
||||
} `json:"timings"`
|
||||
}
|
||||
|
||||
// warmUp performs warm-up requests before the actual benchmark
|
||||
func warmUp(b *testing.B, tt TestCase) {
|
||||
b.Logf("Warming up for test case %s", tt.name)
|
||||
warmupTest := TestCase{
|
||||
name: tt.name + "_warmup",
|
||||
prompt: tt.prompt,
|
||||
maxTokens: warmupTokens,
|
||||
}
|
||||
|
||||
for i := 0; i < warmupPrompts; i++ {
|
||||
runCompletion(context.Background(), warmupTest, b)
|
||||
time.Sleep(100 * time.Millisecond) // Brief pause between warm-up requests
|
||||
}
|
||||
b.Logf("Warm-up complete")
|
||||
}
|
||||
|
||||
func BenchmarkRunnerInference(b *testing.B) {
|
||||
b.Logf("Starting benchmark suite")
|
||||
|
||||
// Verify server availability
|
||||
if _, err := http.Get(runnerURL + "/health"); err != nil {
|
||||
b.Fatalf("Runner unavailable: %v", err)
|
||||
}
|
||||
b.Log("Runner available")
|
||||
|
||||
tests := []TestCase{
|
||||
{
|
||||
name: "short_prompt",
|
||||
prompt: formatPrompt("Write a long story"),
|
||||
maxTokens: 100,
|
||||
},
|
||||
{
|
||||
name: "medium_prompt",
|
||||
prompt: formatPrompt("Write a detailed economic analysis"),
|
||||
maxTokens: 500,
|
||||
},
|
||||
{
|
||||
name: "long_prompt",
|
||||
prompt: formatPrompt("Write a comprehensive AI research paper"),
|
||||
maxTokens: 1000,
|
||||
},
|
||||
}
|
||||
|
||||
// Register cleanup handler for results reporting
|
||||
b.Cleanup(func() { reportMetrics(metrics) })
|
||||
|
||||
// Main benchmark loop
|
||||
for _, tt := range tests {
|
||||
b.Run(tt.name, func(b *testing.B) {
|
||||
// Perform warm-up requests
|
||||
warmUp(b, tt)
|
||||
|
||||
// Wait a bit after warm-up before starting the actual benchmark
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
m := make([]BenchmarkMetrics, b.N)
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
b.ResetTimer()
|
||||
m[i] = runCompletion(context.Background(), tt, b)
|
||||
}
|
||||
metrics = append(metrics, m...)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func formatPrompt(text string) string {
|
||||
return fmt.Sprintf("<|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n%s<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", text)
|
||||
}
|
||||
|
||||
func runCompletion(ctx context.Context, tt TestCase, b *testing.B) BenchmarkMetrics {
|
||||
start := time.Now()
|
||||
var ttft time.Duration
|
||||
var tokens int
|
||||
lastToken := start
|
||||
|
||||
// Create request body
|
||||
reqBody := CompletionRequest{
|
||||
Prompt: tt.prompt,
|
||||
NumPredict: tt.maxTokens,
|
||||
Temperature: 0.1,
|
||||
}
|
||||
jsonData, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
b.Fatalf("Failed to marshal request: %v", err)
|
||||
}
|
||||
|
||||
// Create HTTP request
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", runnerURL+"/completion", bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
b.Fatalf("Failed to create request: %v", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
// Execute request
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
b.Fatalf("Request failed: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Process streaming response
|
||||
decoder := json.NewDecoder(resp.Body)
|
||||
for {
|
||||
var chunk CompletionResponse
|
||||
if err := decoder.Decode(&chunk); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
b.Fatalf("Failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if ttft == 0 && chunk.Content != "" {
|
||||
ttft = time.Since(start)
|
||||
}
|
||||
|
||||
if chunk.Content != "" {
|
||||
tokens++
|
||||
lastToken = time.Now()
|
||||
}
|
||||
|
||||
if chunk.Stop {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
totalTime := lastToken.Sub(start)
|
||||
return BenchmarkMetrics{
|
||||
testName: tt.name,
|
||||
ttft: ttft,
|
||||
totalTime: totalTime,
|
||||
totalTokens: tokens,
|
||||
tokensPerSecond: float64(tokens) / totalTime.Seconds(),
|
||||
}
|
||||
}
|
||||
293
benchmark/server_benchmark_test.go
Normal file
293
benchmark/server_benchmark_test.go
Normal file
@@ -0,0 +1,293 @@
|
||||
// Package benchmark provides tools for performance testing of Ollama inference server and supported models.
|
||||
package benchmark
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// ServerURL is the default Ollama server URL for benchmarking
|
||||
const serverURL = "http://127.0.0.1:11434"
|
||||
|
||||
// metrics collects all benchmark results for final reporting
|
||||
var metrics []BenchmarkMetrics
|
||||
|
||||
// models contains the list of model names to benchmark
|
||||
var models = []string{
|
||||
"llama3.2:1b",
|
||||
// "qwen2.5:7b",
|
||||
// "llama3.3:70b",
|
||||
}
|
||||
|
||||
// TestCase defines a benchmark test scenario with prompt characteristics
|
||||
type TestCase struct {
|
||||
name string // Human-readable test name
|
||||
prompt string // Input prompt text
|
||||
maxTokens int // Maximum tokens to generate
|
||||
}
|
||||
|
||||
// BenchmarkMetrics contains performance measurements for a single test run
|
||||
type BenchmarkMetrics struct {
|
||||
model string // Model being tested
|
||||
scenario string // cold_start or warm_start
|
||||
testName string // Name of the test case
|
||||
ttft time.Duration // Time To First Token (TTFT)
|
||||
totalTime time.Duration // Total time for complete response
|
||||
totalTokens int // Total generated tokens
|
||||
tokensPerSecond float64 // Calculated throughput
|
||||
}
|
||||
|
||||
// ScenarioType defines the initialization state for benchmarking
|
||||
type ScenarioType int
|
||||
|
||||
const (
|
||||
ColdStart ScenarioType = iota // Model is loaded from cold state
|
||||
WarmStart // Model is already loaded in memory
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer for ScenarioType
|
||||
func (s ScenarioType) String() string {
|
||||
return [...]string{"cold_start", "warm_start"}[s]
|
||||
}
|
||||
|
||||
// BenchmarkServerInference is the main entry point for benchmarking Ollama inference performance.
|
||||
// It tests all configured models with different prompt lengths and start scenarios.
|
||||
func BenchmarkServerInference(b *testing.B) {
|
||||
b.Logf("Starting benchmark suite with %d models", len(models))
|
||||
|
||||
// Verify server availability
|
||||
if _, err := http.Get(serverURL + "/api/version"); err != nil {
|
||||
b.Fatalf("Server unavailable: %v", err)
|
||||
}
|
||||
b.Log("Server available")
|
||||
|
||||
tests := []TestCase{
|
||||
{"short_prompt", "Write a long story", 100},
|
||||
{"medium_prompt", "Write a detailed economic analysis", 500},
|
||||
{"long_prompt", "Write a comprehensive AI research paper", 1000},
|
||||
}
|
||||
|
||||
// Register cleanup handler for results reporting
|
||||
b.Cleanup(func() { reportMetrics(metrics) })
|
||||
|
||||
// Main benchmark loop
|
||||
for _, model := range models {
|
||||
client := api.NewClient(mustParse(serverURL), http.DefaultClient)
|
||||
// Verify model availability
|
||||
if _, err := client.Show(context.Background(), &api.ShowRequest{Model: model}); err != nil {
|
||||
b.Fatalf("Model unavailable: %v", err)
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
testName := fmt.Sprintf("%s/%s/%s", model, ColdStart, tt.name)
|
||||
b.Run(testName, func(b *testing.B) {
|
||||
m := runBenchmark(b, tt, model, ColdStart, client)
|
||||
metrics = append(metrics, m...)
|
||||
})
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
testName := fmt.Sprintf("%s/%s/%s", model, WarmStart, tt.name)
|
||||
b.Run(testName, func(b *testing.B) {
|
||||
m := runBenchmark(b, tt, model, WarmStart, client)
|
||||
metrics = append(metrics, m...)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runBenchmark executes multiple iterations of a specific test case and scenario.
|
||||
// Returns collected metrics for all iterations.
|
||||
func runBenchmark(b *testing.B, tt TestCase, model string, scenario ScenarioType, client *api.Client) []BenchmarkMetrics {
|
||||
results := make([]BenchmarkMetrics, b.N)
|
||||
|
||||
// Run benchmark iterations
|
||||
for i := 0; i < b.N; i++ {
|
||||
switch scenario {
|
||||
case WarmStart:
|
||||
// Pre-warm the model by generating some tokens
|
||||
for i := 0; i < 2; i++ {
|
||||
client.Generate(
|
||||
context.Background(),
|
||||
&api.GenerateRequest{
|
||||
Model: model,
|
||||
Prompt: tt.prompt,
|
||||
Options: map[string]interface{}{"num_predict": tt.maxTokens, "temperature": 0.1},
|
||||
},
|
||||
func(api.GenerateResponse) error { return nil },
|
||||
)
|
||||
}
|
||||
case ColdStart:
|
||||
unloadModel(client, model, b)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
results[i] = runSingleIteration(context.Background(), client, tt, model, b)
|
||||
results[i].scenario = scenario.String()
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
// unloadModel forces model unloading using KeepAlive: -1 parameter.
|
||||
// Includes short delay to ensure unloading completes before next test.
|
||||
func unloadModel(client *api.Client, model string, b *testing.B) {
|
||||
req := &api.GenerateRequest{
|
||||
Model: model,
|
||||
KeepAlive: &api.Duration{Duration: 0},
|
||||
}
|
||||
if err := client.Generate(context.Background(), req, func(api.GenerateResponse) error { return nil }); err != nil {
|
||||
b.Logf("Unload error: %v", err)
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
// runSingleIteration measures performance metrics for a single inference request.
|
||||
// Captures TTFT, total generation time, and calculates tokens/second.
|
||||
func runSingleIteration(ctx context.Context, client *api.Client, tt TestCase, model string, b *testing.B) BenchmarkMetrics {
|
||||
start := time.Now()
|
||||
var ttft time.Duration
|
||||
var tokens int
|
||||
lastToken := start
|
||||
|
||||
req := &api.GenerateRequest{
|
||||
Model: model,
|
||||
Prompt: tt.prompt,
|
||||
Options: map[string]interface{}{"num_predict": tt.maxTokens, "temperature": 0.1},
|
||||
}
|
||||
|
||||
if b != nil {
|
||||
b.Logf("Prompt length: %d chars", len(tt.prompt))
|
||||
}
|
||||
|
||||
// Execute generation request with metrics collection
|
||||
client.Generate(ctx, req, func(resp api.GenerateResponse) error {
|
||||
if ttft == 0 {
|
||||
ttft = time.Since(start)
|
||||
}
|
||||
if resp.Response != "" {
|
||||
tokens++
|
||||
lastToken = time.Now()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
totalTime := lastToken.Sub(start)
|
||||
return BenchmarkMetrics{
|
||||
model: model,
|
||||
testName: tt.name,
|
||||
ttft: ttft,
|
||||
totalTime: totalTime,
|
||||
totalTokens: tokens,
|
||||
tokensPerSecond: float64(tokens) / totalTime.Seconds(),
|
||||
}
|
||||
}
|
||||
|
||||
// reportMetrics processes collected metrics and prints formatted results.
|
||||
// Generates both human-readable tables and CSV output with averaged statistics.
|
||||
func reportMetrics(results []BenchmarkMetrics) {
|
||||
if len(results) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Aggregate results by test case
|
||||
type statsKey struct {
|
||||
model string
|
||||
scenario string
|
||||
testName string
|
||||
}
|
||||
stats := make(map[statsKey]*struct {
|
||||
ttftSum time.Duration
|
||||
totalTimeSum time.Duration
|
||||
tokensSum int
|
||||
iterations int
|
||||
})
|
||||
|
||||
for _, m := range results {
|
||||
key := statsKey{m.model, m.scenario, m.testName}
|
||||
if _, exists := stats[key]; !exists {
|
||||
stats[key] = &struct {
|
||||
ttftSum time.Duration
|
||||
totalTimeSum time.Duration
|
||||
tokensSum int
|
||||
iterations int
|
||||
}{}
|
||||
}
|
||||
|
||||
stats[key].ttftSum += m.ttft
|
||||
stats[key].totalTimeSum += m.totalTime
|
||||
stats[key].tokensSum += m.totalTokens
|
||||
stats[key].iterations++
|
||||
}
|
||||
|
||||
// Calculate averages
|
||||
var averaged []BenchmarkMetrics
|
||||
for key, data := range stats {
|
||||
count := data.iterations
|
||||
averaged = append(averaged, BenchmarkMetrics{
|
||||
model: key.model,
|
||||
scenario: key.scenario,
|
||||
testName: key.testName,
|
||||
ttft: data.ttftSum / time.Duration(count),
|
||||
totalTime: data.totalTimeSum / time.Duration(count),
|
||||
totalTokens: data.tokensSum / count,
|
||||
tokensPerSecond: float64(data.tokensSum) / data.totalTimeSum.Seconds(),
|
||||
})
|
||||
}
|
||||
|
||||
// Print formatted results
|
||||
printTableResults(averaged)
|
||||
printCSVResults(averaged)
|
||||
}
|
||||
|
||||
// printTableResults displays averaged metrics in a formatted table
|
||||
func printTableResults(averaged []BenchmarkMetrics) {
|
||||
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
|
||||
fmt.Fprintln(w, "\nAVERAGED BENCHMARK RESULTS")
|
||||
fmt.Fprintln(w, "Model\tScenario\tTest Name\tTTFT (ms)\tTotal Time (ms)\tTokens\tTokens/sec")
|
||||
for _, m := range averaged {
|
||||
fmt.Fprintf(w, "%s\t%s\t%s\t%.2f\t%.2f\t%d\t%.2f\n",
|
||||
m.model,
|
||||
m.scenario,
|
||||
m.testName,
|
||||
float64(m.ttft.Milliseconds()),
|
||||
float64(m.totalTime.Milliseconds()),
|
||||
m.totalTokens,
|
||||
m.tokensPerSecond,
|
||||
)
|
||||
}
|
||||
w.Flush()
|
||||
}
|
||||
|
||||
// printCSVResults outputs averaged metrics in CSV format
|
||||
func printCSVResults(averaged []BenchmarkMetrics) {
|
||||
fmt.Println("\nCSV OUTPUT")
|
||||
fmt.Println("model,scenario,test_name,ttft_ms,total_ms,tokens,tokens_per_sec")
|
||||
for _, m := range averaged {
|
||||
fmt.Printf("%s,%s,%s,%.2f,%.2f,%d,%.2f\n",
|
||||
m.model,
|
||||
m.scenario,
|
||||
m.testName,
|
||||
float64(m.ttft.Milliseconds()),
|
||||
float64(m.totalTime.Milliseconds()),
|
||||
m.totalTokens,
|
||||
m.tokensPerSecond,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// mustParse is a helper function to parse URLs with panic on error
|
||||
func mustParse(rawURL string) *url.URL {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return u
|
||||
}
|
||||
420
cache/cache.go
vendored
Normal file
420
cache/cache.go
vendored
Normal file
@@ -0,0 +1,420 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"slices"
|
||||
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
var ErrNotSupported = errors.New("model does not support operation")
|
||||
|
||||
type Cache interface {
|
||||
// ** used by model implementations **
|
||||
|
||||
// Returns an instance of the cache for layer 'i'
|
||||
Sub(i int) Cache
|
||||
|
||||
// Returns the history of key and value tensors plus a mask
|
||||
//
|
||||
// The tensors are of shape embed dim, kv heads, batch size
|
||||
// The mask is of shape history size, batch size
|
||||
Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor)
|
||||
|
||||
// Stores a batch of key and value in the cache
|
||||
//
|
||||
// The tensors must be of shape embed dim, kv heads, batch size
|
||||
Put(ctx ml.Context, key, value ml.Tensor)
|
||||
|
||||
// ** cache management **
|
||||
|
||||
// Closes the cache and frees resources associated with it
|
||||
Close()
|
||||
|
||||
// Called before the start of the model's forward pass. For each
|
||||
// token in the coming batch, there must be a corresponding entry
|
||||
// in positions and seqs.
|
||||
StartForward(ctx ml.Context, positions []int32, seqs []int) error
|
||||
|
||||
// Copies tokens in the range [0, len) from srcSeq to dstSeq
|
||||
CopyPrefix(srcSeq, dstSeq int, len int32)
|
||||
|
||||
// Removes tokens in the range [beginIndex, endIndex) from seq. Set
|
||||
// endIndex to math.MaxInt32 to remove everything starting at beginIndex
|
||||
Remove(seq int, beginIndex, endIndex int32) error
|
||||
}
|
||||
|
||||
type Causal struct {
|
||||
DType ml.DType
|
||||
Capacity int32
|
||||
|
||||
// current forward pass
|
||||
curLayer int
|
||||
curLoc int
|
||||
curBatchSize int
|
||||
curMask ml.Tensor
|
||||
curCellRange cellRange
|
||||
|
||||
// metadata
|
||||
cells []cacheCell
|
||||
cellRanges map[int]cellRange
|
||||
|
||||
// cache data storage
|
||||
backend ml.Backend
|
||||
cacheCtx ml.Context
|
||||
keys, values []ml.Tensor
|
||||
}
|
||||
|
||||
type seqCell struct {
|
||||
seq int
|
||||
pos int32
|
||||
}
|
||||
|
||||
type cacheCell struct {
|
||||
sequences []seqCell
|
||||
}
|
||||
|
||||
type cellRange struct {
|
||||
min int
|
||||
max int
|
||||
}
|
||||
|
||||
func (cell cacheCell) findSeq(seq int) *seqCell {
|
||||
for i := range cell.sequences {
|
||||
if cell.sequences[i].seq == seq {
|
||||
return &cell.sequences[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewCausalCache(backend ml.Backend, dtype ml.DType, capacity int32) Cache {
|
||||
return &Causal{
|
||||
Capacity: capacity,
|
||||
DType: dtype,
|
||||
cells: make([]cacheCell, capacity),
|
||||
cellRanges: make(map[int]cellRange),
|
||||
backend: backend,
|
||||
cacheCtx: backend.NewContext(),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Causal) Close() {
|
||||
c.cacheCtx.Close()
|
||||
}
|
||||
|
||||
var ErrKvCacheFull = errors.New("could not find a kv cache slot")
|
||||
|
||||
func (c *Causal) StartForward(ctx ml.Context, positions []int32, seqs []int) error {
|
||||
if len(positions) != len(seqs) {
|
||||
return fmt.Errorf("length of positions (%v) must match length of seqs (%v)", len(positions), len(seqs))
|
||||
}
|
||||
|
||||
c.curBatchSize = len(positions)
|
||||
|
||||
if c.curBatchSize < 1 {
|
||||
return errors.New("batch size cannot be less than 1")
|
||||
}
|
||||
|
||||
var err error
|
||||
c.curLoc, err = c.findStartLoc()
|
||||
if errors.Is(err, ErrKvCacheFull) {
|
||||
c.defrag()
|
||||
c.curLoc, err = c.findStartLoc()
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.curCellRange = newRange()
|
||||
for i, pos := range positions {
|
||||
seq := seqs[i]
|
||||
|
||||
c.cells[c.curLoc+i] = cacheCell{sequences: []seqCell{{seq: seq, pos: pos}}}
|
||||
|
||||
ranges, ok := c.cellRanges[seq]
|
||||
if !ok {
|
||||
ranges = newRange()
|
||||
}
|
||||
|
||||
if c.curLoc+i > ranges.max {
|
||||
ranges.max = c.curLoc + i
|
||||
}
|
||||
if ranges.max > c.curCellRange.max {
|
||||
c.curCellRange.max = ranges.max
|
||||
}
|
||||
|
||||
if c.curLoc+i < ranges.min {
|
||||
ranges.min = c.curLoc + i
|
||||
}
|
||||
if ranges.min < c.curCellRange.min {
|
||||
c.curCellRange.min = ranges.min
|
||||
}
|
||||
c.cellRanges[seq] = ranges
|
||||
}
|
||||
|
||||
c.curMask, err = c.buildMask(ctx, positions, seqs)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func newRange() cellRange {
|
||||
return cellRange{
|
||||
min: math.MaxInt,
|
||||
max: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Causal) findStartLoc() (int, error) {
|
||||
var start, count int
|
||||
for i := range c.cells {
|
||||
if len(c.cells[i].sequences) == 0 {
|
||||
count++
|
||||
if count >= c.curBatchSize {
|
||||
return start, nil
|
||||
}
|
||||
} else {
|
||||
start = i + 1
|
||||
count = 0
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("%w (length: %v)", ErrKvCacheFull, c.Capacity)
|
||||
}
|
||||
|
||||
func (c *Causal) buildMask(ctx ml.Context, positions []int32, seqs []int) (ml.Tensor, error) {
|
||||
// TODO(jessegross): This makes a number of simplifications such as no padding,
|
||||
// which could be an issue for CUDA graphs and/or flash attention
|
||||
len := c.curCellRange.max - c.curCellRange.min + 1
|
||||
mask := make([]float32, c.curBatchSize*len)
|
||||
|
||||
for i := range c.curBatchSize {
|
||||
for j := c.curCellRange.min; j <= c.curCellRange.max; j++ {
|
||||
cellSeq := c.cells[j].findSeq(seqs[i])
|
||||
if cellSeq == nil || cellSeq.pos > positions[i] {
|
||||
mask[i*len+(j-c.curCellRange.min)] = float32(math.Inf(-1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ctx.FromFloatSlice(mask, len, c.curBatchSize)
|
||||
}
|
||||
|
||||
func moveCell(ctx ml.Context, objs []ml.Tensor, src, dst, len int) {
|
||||
for _, obj := range objs {
|
||||
srcView := obj.View(ctx, int(obj.Stride(2))*src, int(obj.Dim(0)*obj.Dim(1))*len)
|
||||
dstView := obj.View(ctx, int(obj.Stride(2))*dst, int(obj.Dim(0)*obj.Dim(1))*len)
|
||||
|
||||
ctx.Forward(srcView.Copy(ctx, dstView))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Causal) defrag() {
|
||||
slog.Debug("defragmenting kv cache")
|
||||
|
||||
// Defrag strategy:
|
||||
// - Search for empty holes at the beginning of the cache,
|
||||
// filling them with active data starting at the end
|
||||
// - If there are contiguous elements that need to be moved,
|
||||
// combine them into a single operation by holding new moves
|
||||
// until we see the next one is non-contiguous
|
||||
// - Fill up the context with the maximum number of operations it
|
||||
// can hold then compute that and continue with a new context
|
||||
//
|
||||
// We could try to optimize placement by grouping blocks from
|
||||
// the same sequences together but most likely the next forward
|
||||
// pass will disrupt this anyways, so the real world benefit
|
||||
// seems limited as this time.
|
||||
|
||||
ctx := c.backend.NewContext()
|
||||
|
||||
// For every move, 6 tensors are required per layer (2 views and a
|
||||
// copy for each of k and v). For efficiency, we try to group
|
||||
// multiple contiguous blocks into a single move. However, if we
|
||||
// exceed the maximum number of tensors then we need to compute
|
||||
// what we have and start a new batch.
|
||||
maxMoves := ctx.MaxTensors() / (6 * len(c.keys))
|
||||
moves := 0
|
||||
|
||||
var pendingSrc, pendingDst, pendingLen int
|
||||
|
||||
for dst := range c.cells {
|
||||
if len(c.cells[dst].sequences) == 0 {
|
||||
for src := len(c.cells) - 1; src > dst; src-- {
|
||||
if len(c.cells[src].sequences) != 0 {
|
||||
c.cells[dst] = c.cells[src]
|
||||
c.cells[src] = cacheCell{}
|
||||
|
||||
if pendingLen > 0 {
|
||||
if src == pendingSrc-pendingLen && dst == pendingDst+pendingLen {
|
||||
pendingSrc = src
|
||||
pendingLen++
|
||||
break
|
||||
} else {
|
||||
moveCell(ctx, c.keys, pendingSrc, pendingDst, pendingLen)
|
||||
moveCell(ctx, c.values, pendingSrc, pendingDst, pendingLen)
|
||||
moves++
|
||||
}
|
||||
}
|
||||
|
||||
pendingSrc = src
|
||||
pendingDst = dst
|
||||
pendingLen = 1
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if moves >= maxMoves {
|
||||
ctx.Compute(nil)
|
||||
ctx.Close()
|
||||
ctx = c.backend.NewContext()
|
||||
|
||||
moves = 0
|
||||
}
|
||||
}
|
||||
|
||||
if pendingLen > 0 {
|
||||
moveCell(ctx, c.keys, pendingSrc, pendingDst, pendingLen)
|
||||
moveCell(ctx, c.values, pendingSrc, pendingDst, pendingLen)
|
||||
moves++
|
||||
}
|
||||
|
||||
if moves > 0 {
|
||||
ctx.Compute(nil)
|
||||
}
|
||||
ctx.Close()
|
||||
|
||||
for seq := range c.cellRanges {
|
||||
seqRange := newRange()
|
||||
|
||||
for i, cell := range c.cells {
|
||||
if cell.findSeq(seq) != nil {
|
||||
if i < seqRange.min {
|
||||
seqRange.min = i
|
||||
}
|
||||
if i > seqRange.max {
|
||||
seqRange.max = i
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.cellRanges[seq] = seqRange
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Causal) Sub(i int) Cache {
|
||||
if i >= len(c.keys) {
|
||||
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...)
|
||||
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...)
|
||||
}
|
||||
|
||||
c.curLayer = i
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Causal) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
|
||||
key := c.keys[c.curLayer]
|
||||
value := c.values[c.curLayer]
|
||||
|
||||
key = key.View(ctx, int(key.Stride(2))*c.curCellRange.min,
|
||||
int(key.Dim(0)), int(key.Stride(1)),
|
||||
int(key.Dim(1)), int(key.Stride(2)),
|
||||
int(c.curMask.Dim(0)),
|
||||
)
|
||||
|
||||
value = value.View(ctx, int(key.Stride(2))*c.curCellRange.min,
|
||||
int(value.Dim(0)), int(value.Stride(1)),
|
||||
int(value.Dim(1)), int(value.Stride(2)),
|
||||
int(c.curMask.Dim(0)),
|
||||
)
|
||||
|
||||
return key, value, c.curMask
|
||||
}
|
||||
|
||||
func (c *Causal) Put(ctx ml.Context, key, value ml.Tensor) {
|
||||
if c.curBatchSize != int(key.Dim(2)) {
|
||||
panic(fmt.Errorf("inconsistent batch sizes (layer: %v, batch size: %v layer batch size: %v)", c.curLayer, c.curBatchSize, int(key.Dim(2))))
|
||||
}
|
||||
|
||||
if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
|
||||
c.keys[c.curLayer] = c.cacheCtx.Zeros(c.DType, key.Dim(0), key.Dim(1), int64(c.Capacity))
|
||||
c.values[c.curLayer] = c.cacheCtx.Zeros(c.DType, value.Dim(0), value.Dim(1), int64(c.Capacity))
|
||||
}
|
||||
|
||||
ctx.Forward(key.Copy(ctx, c.keys[c.curLayer].View(ctx, int(key.Stride(2))*c.curLoc, int(key.Dim(0)*key.Dim(1)*key.Dim(2)))))
|
||||
ctx.Forward(value.Copy(ctx, c.values[c.curLayer].View(ctx, int(value.Stride(2))*c.curLoc, int(value.Dim(0)*value.Dim(1)*value.Dim(2)))))
|
||||
}
|
||||
|
||||
func (c *Causal) CopyPrefix(srcSeq, dstSeq int, len int32) {
|
||||
seqRange := newRange()
|
||||
|
||||
for i := range c.cells {
|
||||
srcCellSeq := c.cells[i].findSeq(srcSeq)
|
||||
dstCellSeq := c.cells[i].findSeq(dstSeq)
|
||||
|
||||
if dstCellSeq != nil {
|
||||
c.cells[i].sequences = slices.DeleteFunc(c.cells[i].sequences, func(s seqCell) bool { return s.seq == dstSeq })
|
||||
}
|
||||
|
||||
if srcCellSeq != nil && srcCellSeq.pos < len {
|
||||
c.cells[i].sequences = append(c.cells[i].sequences, seqCell{seq: dstSeq, pos: srcCellSeq.pos})
|
||||
if i < seqRange.min {
|
||||
seqRange.min = i
|
||||
}
|
||||
if i > seqRange.max {
|
||||
seqRange.max = i
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.cellRanges[dstSeq] = seqRange
|
||||
}
|
||||
|
||||
func (c *Causal) shift(seq int, beginIndex, offset int32) error {
|
||||
panic("Shift not yet implemented")
|
||||
}
|
||||
|
||||
func (c *Causal) Remove(seq int, beginIndex, endIndex int32) error {
|
||||
var offset int32
|
||||
if endIndex != math.MaxInt32 {
|
||||
offset = beginIndex - endIndex
|
||||
}
|
||||
|
||||
seqRange := newRange()
|
||||
|
||||
for i := range c.cells {
|
||||
cellSeq := c.cells[i].findSeq(seq)
|
||||
if cellSeq != nil {
|
||||
if cellSeq.pos >= beginIndex && cellSeq.pos < endIndex {
|
||||
c.cells[i].sequences = slices.DeleteFunc(c.cells[i].sequences, func(s seqCell) bool { return s.seq == seq })
|
||||
} else {
|
||||
if cellSeq.pos >= endIndex {
|
||||
cellSeq.pos += offset
|
||||
}
|
||||
if i < seqRange.min {
|
||||
seqRange.min = i
|
||||
}
|
||||
if i > seqRange.max {
|
||||
seqRange.max = i
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if endIndex != math.MaxInt32 {
|
||||
err := c.shift(seq, endIndex, offset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.cellRanges[seq] = seqRange
|
||||
|
||||
return nil
|
||||
}
|
||||
47
cache/tensor.go
vendored
Normal file
47
cache/tensor.go
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
type TensorCache struct {
|
||||
curLayer int
|
||||
|
||||
cacheCtx ml.Context
|
||||
keys, values []ml.Tensor
|
||||
}
|
||||
|
||||
func NewTensorCache(backend ml.Backend) *TensorCache {
|
||||
return &TensorCache{
|
||||
cacheCtx: backend.NewContext(),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *TensorCache) Close() {
|
||||
c.cacheCtx.Close()
|
||||
}
|
||||
|
||||
func (c *TensorCache) Sub(i int) *TensorCache {
|
||||
if i >= len(c.keys) {
|
||||
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...)
|
||||
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...)
|
||||
}
|
||||
|
||||
c.curLayer = i
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *TensorCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
|
||||
return c.keys[c.curLayer], c.values[c.curLayer], nil
|
||||
}
|
||||
|
||||
func (c *TensorCache) Put(ctx ml.Context, key, value ml.Tensor) {
|
||||
if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
|
||||
c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...)
|
||||
c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...)
|
||||
}
|
||||
|
||||
ctx.Forward(key.Copy(ctx, c.keys[c.curLayer]))
|
||||
ctx.Forward(value.Copy(ctx, c.values[c.curLayer]))
|
||||
}
|
||||
@@ -35,9 +35,9 @@ import (
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llama"
|
||||
"github.com/ollama/ollama/llama/runner"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/progress"
|
||||
"github.com/ollama/ollama/runner"
|
||||
"github.com/ollama/ollama/server"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
@@ -59,7 +59,7 @@ func getModelfileName(cmd *cobra.Command) (string, error) {
|
||||
|
||||
_, err = os.Stat(absName)
|
||||
if err != nil {
|
||||
return filename, err
|
||||
return "", err
|
||||
}
|
||||
|
||||
return absName, nil
|
||||
@@ -338,7 +338,10 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
opts.MultiModal = len(info.ProjectorInfo) != 0
|
||||
// TODO(jessegross): We should either find another way to know if this is
|
||||
// a vision model or remove the logic. Also consider that other modalities will
|
||||
// need different behavior anyways.
|
||||
opts.MultiModal = true
|
||||
opts.ParentModel = info.Details.ParentModel
|
||||
|
||||
if interactive {
|
||||
|
||||
@@ -279,7 +279,7 @@ func TestGetModelfileName(t *testing.T) {
|
||||
name: "no modelfile specified, no modelfile exists",
|
||||
modelfileName: "",
|
||||
fileExists: false,
|
||||
expectedName: "Modelfile",
|
||||
expectedName: "",
|
||||
expectedErr: os.ErrNotExist,
|
||||
},
|
||||
{
|
||||
@@ -293,7 +293,7 @@ func TestGetModelfileName(t *testing.T) {
|
||||
name: "modelfile specified, no modelfile exists",
|
||||
modelfileName: "crazyfile",
|
||||
fileExists: false,
|
||||
expectedName: "crazyfile",
|
||||
expectedName: "",
|
||||
expectedErr: os.ErrNotExist,
|
||||
},
|
||||
{
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ollama/ollama/llama/runner"
|
||||
"github.com/ollama/ollama/runner"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type ModelParameters struct {
|
||||
@@ -27,8 +27,8 @@ type AdapterParameters struct {
|
||||
} `json:"lora_parameters"`
|
||||
}
|
||||
|
||||
func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||
kv := llm.KV{
|
||||
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
|
||||
kv := ggml.KV{
|
||||
"general.file_type": uint32(1),
|
||||
"general.quantization_version": uint32(2),
|
||||
"tokenizer.ggml.pre": t.Pre,
|
||||
@@ -54,7 +54,7 @@ func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p AdapterParameters) KV() llm.KV {
|
||||
func (p AdapterParameters) KV() ggml.KV {
|
||||
var alpha float32
|
||||
if p.LoraParameters.Alpha == 0 {
|
||||
alpha = float32(p.Alpha)
|
||||
@@ -62,7 +62,7 @@ func (p AdapterParameters) KV() llm.KV {
|
||||
alpha = p.LoraParameters.Alpha
|
||||
}
|
||||
|
||||
kv := llm.KV{
|
||||
kv := ggml.KV{
|
||||
"adapter.lora.alpha": alpha,
|
||||
"adapter.type": "lora",
|
||||
"general.file_type": uint32(1),
|
||||
@@ -79,19 +79,19 @@ func (ModelParameters) specialTokenTypes() []string {
|
||||
}
|
||||
}
|
||||
|
||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
return llm.WriteGGUF(ws, kv, ts)
|
||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(ws, kv, ts)
|
||||
}
|
||||
|
||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
return llm.WriteGGUF(ws, kv, ts)
|
||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(ws, kv, ts)
|
||||
}
|
||||
|
||||
type ModelConverter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(*Tokenizer) llm.KV
|
||||
KV(*Tokenizer) ggml.KV
|
||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||
Tensors([]Tensor) []llm.Tensor
|
||||
Tensors([]Tensor) []ggml.Tensor
|
||||
// Replacements returns a list of string pairs to replace in tensor names.
|
||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
@@ -99,7 +99,7 @@ type ModelConverter interface {
|
||||
// specialTokenTypes returns any special token types the model uses
|
||||
specialTokenTypes() []string
|
||||
// writeFile writes the model to the provided io.WriteSeeker
|
||||
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
||||
}
|
||||
|
||||
type moreParser interface {
|
||||
@@ -108,17 +108,17 @@ type moreParser interface {
|
||||
|
||||
type AdapterConverter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(llm.KV) llm.KV
|
||||
KV(ggml.KV) ggml.KV
|
||||
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
||||
Tensors([]Tensor) []llm.Tensor
|
||||
Tensors([]Tensor) []ggml.Tensor
|
||||
// Replacements returns a list of string pairs to replace in tensor names.
|
||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
|
||||
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
||||
}
|
||||
|
||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
|
||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
|
||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -187,8 +187,12 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
||||
conv = &gemma2Model{}
|
||||
case "Phi3ForCausalLM":
|
||||
conv = &phi3Model{}
|
||||
case "Qwen2ForCausalLM":
|
||||
conv = &qwen2Model{}
|
||||
case "BertModel":
|
||||
conv = &bertModel{}
|
||||
case "CohereForCausalLM":
|
||||
conv = &commandrModel{}
|
||||
default:
|
||||
return errors.New("unsupported architecture")
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type bertModel struct {
|
||||
@@ -85,7 +85,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *bertModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "bert"
|
||||
kv["bert.attention.causal"] = false
|
||||
@@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
if slices.Contains([]string{
|
||||
"embeddings.position_ids",
|
||||
@@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
||||
76
convert/convert_commandr.go
Normal file
76
convert/convert_commandr.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type commandrModel struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
UseQKNorm bool `json:"use_qk_norm"`
|
||||
MaxLength uint32 `json:"model_max_length"`
|
||||
LogitScale float32 `json:"logit_scale"`
|
||||
NCtx uint32 `json:"n_ctx"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*commandrModel)(nil)
|
||||
|
||||
func (p *commandrModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "command-r"
|
||||
kv["general.name"] = "command-r"
|
||||
kv["command-r.context_length"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings, p.NCtx)
|
||||
kv["command-r.embedding_length"] = p.HiddenSize
|
||||
kv["command-r.block_count"] = p.HiddenLayers
|
||||
kv["command-r.feed_forward_length"] = p.IntermediateSize
|
||||
kv["command-r.attention.head_count"] = p.NumAttentionHeads
|
||||
kv["command-r.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||
kv["command-r.attention.layer_norm_epsilon"] = p.LayerNormEPS
|
||||
kv["command-r.rope.freq_base"] = p.RopeTheta
|
||||
kv["command-r.max_position_embeddings"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings)
|
||||
kv["command-r.logit_scale"] = p.LogitScale
|
||||
kv["command-r.rope.scaling.type"] = "none"
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *commandrModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *commandrModel) Replacements() []string {
|
||||
return []string{
|
||||
"self_attn.q_norm", "attn_q_norm",
|
||||
"self_attn.k_norm", "attn_k_norm",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"model.norm", "output_norm",
|
||||
"model.embed_tokens", "token_embd",
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type gemmaModel struct {
|
||||
@@ -23,7 +23,7 @@ type gemmaModel struct {
|
||||
|
||||
var _ ModelConverter = (*gemmaModel)(nil)
|
||||
|
||||
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma"
|
||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||
@@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *gemmaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||
t.SetRepacker(p.addOne)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
import "github.com/ollama/ollama/fs/ggml"
|
||||
|
||||
type gemma2Model struct {
|
||||
gemmaModel
|
||||
@@ -11,7 +9,7 @@ type gemma2Model struct {
|
||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||
}
|
||||
|
||||
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
|
||||
func (p *gemma2Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma2"
|
||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type gemma2Adapter struct {
|
||||
@@ -15,14 +15,14 @@ type gemma2Adapter struct {
|
||||
|
||||
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
||||
|
||||
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
|
||||
func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "gemma2"
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
@@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llamaModel struct {
|
||||
@@ -46,7 +46,7 @@ type llamaModel struct {
|
||||
|
||||
var _ ModelConverter = (*llamaModel)(nil)
|
||||
|
||||
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.vocab_size"] = p.VocabSize
|
||||
@@ -120,11 +120,11 @@ func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
|
||||
if p.RopeScaling.factors != nil {
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: "rope_freqs.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
||||
@@ -138,7 +138,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llamaAdapter struct {
|
||||
@@ -18,7 +18,7 @@ type llamaAdapter struct {
|
||||
|
||||
var _ AdapterConverter = (*llamaAdapter)(nil)
|
||||
|
||||
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||
func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
||||
@@ -29,8 +29,8 @@ func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
@@ -41,7 +41,7 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type mixtralModel struct {
|
||||
@@ -15,7 +15,7 @@ type mixtralModel struct {
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
}
|
||||
|
||||
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.llamaModel.KV(t)
|
||||
|
||||
if p.NumLocalExperts > 0 {
|
||||
@@ -29,7 +29,7 @@ func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *mixtralModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
oldnew := []string{
|
||||
"model.layers", "blk",
|
||||
"w1", "ffn_gate_exps",
|
||||
@@ -56,10 +56,10 @@ func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
return true
|
||||
})
|
||||
|
||||
var out []llm.Tensor
|
||||
var out []ggml.Tensor
|
||||
for n, e := range experts {
|
||||
// TODO(mxyng): sanity check experts
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: n,
|
||||
Kind: e[0].Kind(),
|
||||
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type phi3Model struct {
|
||||
@@ -37,7 +37,7 @@ type phi3Model struct {
|
||||
|
||||
var _ ModelConverter = (*phi3Model)(nil)
|
||||
|
||||
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||
func (p *phi3Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "phi3"
|
||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||
@@ -68,19 +68,19 @@ func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var addRopeFactors sync.Once
|
||||
|
||||
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||
out := make([]ggml.Tensor, 0, len(ts)+2)
|
||||
for _, t := range ts {
|
||||
if strings.HasPrefix(t.Name(), "blk.0.") {
|
||||
addRopeFactors.Do(func() {
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: "rope_factors_long.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
||||
WriterTo: p.RopeScaling.LongFactor,
|
||||
}, llm.Tensor{
|
||||
}, ggml.Tensor{
|
||||
Name: "rope_factors_short.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
||||
@@ -89,7 +89,7 @@ func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||
})
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
||||
79
convert/convert_qwen2.go
Normal file
79
convert/convert_qwen2.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package convert
|
||||
|
||||
import "github.com/ollama/ollama/fs/ggml"
|
||||
|
||||
|
||||
type qwen2Model struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
Factor ropeFactor `json:"factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
} `json:"rope_scaling"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
}
|
||||
|
||||
var _ ModelConverter = (*qwen2Model)(nil)
|
||||
|
||||
func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := q.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "qwen2"
|
||||
kv["qwen2.block_count"] = q.HiddenLayers
|
||||
kv["qwen2.context_length"] = q.MaxPositionEmbeddings
|
||||
kv["qwen2.embedding_length"] = q.HiddenSize
|
||||
kv["qwen2.feed_forward_length"] = q.IntermediateSize
|
||||
kv["qwen2.attention.head_count"] = q.NumAttentionHeads
|
||||
kv["qwen2.attention.head_count_kv"] = q.NumKeyValueHeads
|
||||
kv["qwen2.rope.freq_base"] = q.RopeTheta
|
||||
kv["qwen2.attention.layer_norm_rms_epsilon"] = q.RMSNormEPS
|
||||
|
||||
switch q.RopeScaling.Type {
|
||||
case "":
|
||||
// no scaling
|
||||
case "yarn":
|
||||
kv["qwen2.rope.scaling.type"] = q.RopeScaling.Type
|
||||
kv["qwen2.rope.scaling.factor"] = q.RopeScaling.Factor
|
||||
default:
|
||||
panic("unknown rope scaling type")
|
||||
}
|
||||
return kv
|
||||
}
|
||||
|
||||
func (q *qwen2Model) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *qwen2Model) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.layers", "blk",
|
||||
"input_layernorm", "attn_norm",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"model.norm", "output_norm",
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,7 @@ import (
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type tensorData struct {
|
||||
@@ -29,7 +29,7 @@ type tensorData struct {
|
||||
Shape []int `json:"shape"`
|
||||
}
|
||||
|
||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
|
||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, ggml.KV, ggml.Tensors) {
|
||||
t.Helper()
|
||||
|
||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||
@@ -48,7 +48,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
|
||||
}
|
||||
t.Cleanup(func() { r.Close() })
|
||||
|
||||
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||
m, _, err := ggml.Decode(r, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -60,7 +60,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
|
||||
return r, m.KV(), m.Tensors()
|
||||
}
|
||||
|
||||
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tensors) map[string]string {
|
||||
func generateResultsJSON(t *testing.T, f *os.File, kv ggml.KV, tensors ggml.Tensors) map[string]string {
|
||||
actual := make(map[string]string)
|
||||
for k, v := range kv {
|
||||
if s, ok := v.(json.Marshaler); !ok {
|
||||
@@ -75,7 +75,7 @@ func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tenso
|
||||
}
|
||||
}
|
||||
|
||||
for _, tensor := range tensors.Items {
|
||||
for _, tensor := range tensors.Items() {
|
||||
sha256sum := sha256.New()
|
||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||
@@ -108,6 +108,8 @@ func TestConvertModel(t *testing.T) {
|
||||
"Phi-3-mini-128k-instruct",
|
||||
"all-MiniLM-L6-v2",
|
||||
"gemma-2-9b-it",
|
||||
"Qwen2.5-0.5B-Instruct",
|
||||
"c4ai-command-r-v01",
|
||||
}
|
||||
|
||||
for i := range cases {
|
||||
@@ -330,7 +332,7 @@ func TestConvertAdapter(t *testing.T) {
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||
m, _, err := ggml.Decode(r, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
314
convert/testdata/Qwen2.5-0.5B-Instruct.json
vendored
Normal file
314
convert/testdata/Qwen2.5-0.5B-Instruct.json
vendored
Normal file
@@ -0,0 +1,314 @@
|
||||
{
|
||||
"general.architecture": "qwen2",
|
||||
"general.file_type": "1",
|
||||
"general.parameter_count": "494032768",
|
||||
"general.quantization_version": "2",
|
||||
"output_norm.weight": "93a01a6db3419e85320a244bbf8ae81c43033b1d10c342bea3797ff2ce348390",
|
||||
"qwen2.attention.head_count": "14",
|
||||
"qwen2.attention.head_count_kv": "2",
|
||||
"qwen2.attention.layer_norm_rms_epsilon": "1e-06",
|
||||
"qwen2.block_count": "24",
|
||||
"qwen2.context_length": "32768",
|
||||
"qwen2.embedding_length": "896",
|
||||
"qwen2.feed_forward_length": "4864",
|
||||
"qwen2.rope.freq_base": "1e+06",
|
||||
"token_embd.weight": "d74257dc547b48be5ae7b93f1c9af072c0c42dbbb85503078e25c59cd09e68d0",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.add_padding_token": "false",
|
||||
"tokenizer.ggml.eos_token_id": "151645",
|
||||
"tokenizer.ggml.merges": "6b1b1c58f1223d74f9095929d3e6416cdd74784440221a5507b87b8197f2bfd2",
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
"tokenizer.ggml.padding_token_id": "151643",
|
||||
"tokenizer.ggml.pre": "qwen2",
|
||||
"tokenizer.ggml.scores": "94e247e531e8b0fa3d248f3de09c9beae0c87da8106208a8edfaac0b8ec4b53d",
|
||||
"tokenizer.ggml.token_type": "b178dbc9d1b2e08f84d02918e00fc2de2619a250e6c188c91a6605f701860055",
|
||||
"tokenizer.ggml.tokens": "1d93f6679b23a1152b725f7f473792d54d53c1040c5250d3e46b42f81e0a1a34",
|
||||
"blk.0.attn_k.bias": "5ce6617845f66c34515978d23d52e729c298d8bffa28c356a0428bef17142cf1",
|
||||
"blk.0.attn_k.weight": "a960832a9e0e83e4d95402e5d1a01cc74300fcca0c381237162126330e1a7af8",
|
||||
"blk.0.attn_norm.weight": "32c7d51cd0958f1f1771174192db341f9770516d7595a2f0fd18a4d78bd5aba3",
|
||||
"blk.0.attn_output.weight": "c67e6e7e868354a11bf9121c70ee56c140b20eec611a8955e7dfe54a21d40a98",
|
||||
"blk.0.attn_q.bias": "3e9e994eb1f03bccfc82f8bb3c324c920d42d547e07de5be83be12c428645063",
|
||||
"blk.0.attn_q.weight": "dc12132f789b97cfa1e3f5775ceb835247fa67aa47400fd09c8f9f3769208583",
|
||||
"blk.0.attn_v.bias": "a3fd0757b31fdc78af5ec320332d239c1a79d34e8804df06c5454e86955e8cc9",
|
||||
"blk.0.attn_v.weight": "f43094a2134c7ee2dcc52aac3c8b7d9d64fb0295a8adb94cabfd49213f017b84",
|
||||
"blk.0.ffn_down.weight": "18c2aec92db14f21976838a8c35d5575f80d0e4b1e05ccc0d8388d5877e80147",
|
||||
"blk.0.ffn_gate.weight": "a3a1c4ef38f8f750eabadfe3d83bbb0f77941eec1cc1a388e51852e99c8691f6",
|
||||
"blk.0.ffn_norm.weight": "b59b779c42d44b5c4cec41e39b4eb61e0491a07c1b3e946ccb5b8d5c657eda3f",
|
||||
"blk.0.ffn_up.weight": "db64f09987ea59449e90abae5a2ffcc20efd9203f0eebec77a6aacb5809d6cff",
|
||||
"blk.1.attn_k.bias": "a5c8c5671703ec0aa0143ff70a20ffdd67b5d5790ca1dfa5bba4e87e4071ed9f",
|
||||
"blk.1.attn_k.weight": "835c7c7cc95b3cb2e55bd9cac585aa0760a033896621d3e06421f3378c540f7d",
|
||||
"blk.1.attn_norm.weight": "f4c36fb6c14fce721fab0de78cc118d6f66e3a3d3ea0017bb14aade24c3c5434",
|
||||
"blk.1.attn_output.weight": "cc1e80310c97cef068e48e40b7096f32fa2138519d6209c6a1a9994985999016",
|
||||
"blk.1.attn_q.bias": "bc332780e66b0aac80ec5e63ac32344919a840db2fcc8f87bcef16a43a54138e",
|
||||
"blk.1.attn_q.weight": "d766f06c925cce38d4b31b2165b3448e1fb49a7d561985f95d9cd2fcba52367a",
|
||||
"blk.1.attn_v.bias": "9f486626fb6ed9ac84970a71e9b9818dd2758501fd3f61bb1c08540dcc7a8631",
|
||||
"blk.1.attn_v.weight": "e873d1e5bd4f4d6abfd47c0f55119c2c111105838753ee273a03c5ccea25ce5c",
|
||||
"blk.1.ffn_down.weight": "b3ce82b093f187344de04284b1783a452de1b72640914609b8f830dc81580521",
|
||||
"blk.1.ffn_gate.weight": "5cd44ad237edaca525a28a3ac13975d1b565f576d6a8003237a341ae0d156f2e",
|
||||
"blk.1.ffn_norm.weight": "4ac774ee8afaee119610c46aa1ff89fc6c9084a29d226075bc4aa4d2f15f746c",
|
||||
"blk.1.ffn_up.weight": "042d81ab5f1983d85c81213232f3bfc05a9302d9dfaa98d931ebba326b6058b8",
|
||||
"blk.10.attn_k.bias": "767ecfeacd60a2c2221ac4d76c357190849dd9cdf64ced418d9d0c7949101401",
|
||||
"blk.10.attn_k.weight": "a9f3df343227537636be8202303453086375091944e498bad11e0b91e45e8c71",
|
||||
"blk.10.attn_norm.weight": "01acd0e7b3e363f873dbfde6f0995ffcce83f5aaa10ff91c31dbf775035f6d5a",
|
||||
"blk.10.attn_output.weight": "a531fe660769604ab869f01b203eb115e025cad4c0baeacdd1bcca99cf6d0264",
|
||||
"blk.10.attn_q.bias": "356a02c9163dd660c1340fbe1e049b335ac6178891e00996131bba9ab4cb3e59",
|
||||
"blk.10.attn_q.weight": "81be0cfb227339d83f954cd8dcf35828441211c6e1d184060e3eb76085041e2f",
|
||||
"blk.10.attn_v.bias": "ed0450653284b62f8bf2c2db19c0ff7a6cf3cda1324d0a044c5e3db7bb692bd3",
|
||||
"blk.10.attn_v.weight": "c1247ff7092babd2ed979883095b9aa022b2996cab1c77fb9e6176ddc1498d16",
|
||||
"blk.10.ffn_down.weight": "fda7544965dc9af874f1062c22151c6cefc8ba08cbe15dc67aa89979e77b2de4",
|
||||
"blk.10.ffn_gate.weight": "9f2632b1dee7304d10c70bd38d85bb1f148a628a8468f894f57975b8a2f1d945",
|
||||
"blk.10.ffn_norm.weight": "94f8cbd6b17a4d5aabd93fa32930a687db3b11f086142f1cd71c535c11adcad4",
|
||||
"blk.10.ffn_up.weight": "8dc2f8db0474939a277a3d89db34c3bcc3381cfea57bd05a8426a164634d9112",
|
||||
"blk.11.attn_k.bias": "3b8e5a662b19411e3f6530714b766aad2ee41eebc8161bec9db0bc82d383a6e0",
|
||||
"blk.11.attn_k.weight": "2c29f1ed1ce53ce9604e9ea3663c2c373157e909a0d6064a8920005f6d15dad9",
|
||||
"blk.11.attn_norm.weight": "48f68a99c3da4ab4c9e492677b606d1b8e0e3de1fdbf6a977523f97b8c21ec31",
|
||||
"blk.11.attn_output.weight": "5859f3838a94898b020c23040941ed88f4fcb132db400d0849f30a01f62c0f1c",
|
||||
"blk.11.attn_q.bias": "c5ad89a5628f2bd81252ef44ef6bbcbff15c33ad16fba66435509b959c2af6d3",
|
||||
"blk.11.attn_q.weight": "d102104e5d61c1e3219564f1d0149fd593db6c6daa9f3872460c84403323cfef",
|
||||
"blk.11.attn_v.bias": "8653f7d48c5f75a5b55630819f99ecf01c932f12d33fd1a3ee634613e70edde8",
|
||||
"blk.11.attn_v.weight": "e0a7c7d89b9f2d0d781ce85330022229126e130a8600a09d4a5f920f0bbd50b2",
|
||||
"blk.11.ffn_down.weight": "4a22b3361eba8bbe1d9a6fda1812618e894c49f13bcacb505defa9badb6b96a6",
|
||||
"blk.11.ffn_gate.weight": "484698b206760d3fd8df68b252a3c5bae65c8bf6392fb53a5261b021b6f39144",
|
||||
"blk.11.ffn_norm.weight": "da69e96338cbe30882cf5a9544004387f5bbc0bcb6038e61ba2baabbd2623bac",
|
||||
"blk.11.ffn_up.weight": "26ec74f1f504d1281715680dfbcc321db4e9900c53932fa40955daceb891b9aa",
|
||||
"blk.12.attn_k.bias": "f94b49ec3e498f14f6bc3ebefe1f82018935bbe594df03253bfffae36bc20751",
|
||||
"blk.12.attn_k.weight": "ae6323d0bbcfcea01f598d308993d1a7530317e78c1f64923e36d4b1649e9e73",
|
||||
"blk.12.attn_norm.weight": "3784536a7611a839a42a29a5cc538c74ee4f9793092e5efe1b227b48f8c4d37f",
|
||||
"blk.12.attn_output.weight": "46826c00b066829355db78293ab216e890f5eaaed3a70499ee68785189a6b0d9",
|
||||
"blk.12.attn_q.bias": "b14db2d327ce0deec97beda7d3965a56c43e1e63dc9181840fb176b114cf643a",
|
||||
"blk.12.attn_q.weight": "30f67df52ced06f76b6c85531657584276a454d6ec9bb7d0c7d2ca8f067f5551",
|
||||
"blk.12.attn_v.bias": "57ab4b7e43f4fc5853bca7bfbb2702f8c2c391a49252a760abbb7b26330dc4aa",
|
||||
"blk.12.attn_v.weight": "3ccd9da0cfe241cd33a63310f3ca6d81c5bc5a50d200bfea6612ac376166aca2",
|
||||
"blk.12.ffn_down.weight": "a095774413198a83c549ce132d7c9684c0baef33145eaa889be370ef9c881c81",
|
||||
"blk.12.ffn_gate.weight": "bb3b2bbdfb065d2a0a795909c53beec327781a4a7e974bf9f99c436cea459991",
|
||||
"blk.12.ffn_norm.weight": "3b486c6cd97eb4b17967d9d6c0cc3821a1a6ad73d96b4d8fbf980101b32b8dab",
|
||||
"blk.12.ffn_up.weight": "d020b82dd39a5d5a9d3881397bf53a567790a07f395284e6eb0f5fe0fef53de3",
|
||||
"blk.13.attn_k.bias": "69381f8254586eba3623eceb18697fe79f9b4d8f2c30136acb10d5926e3ba1d0",
|
||||
"blk.13.attn_k.weight": "c4d7a31495d71269f81b586203a50abea3a9e2985667faf258c9306ec6030f1d",
|
||||
"blk.13.attn_norm.weight": "907da11075d16eda668dabe548af3cfd794df26b8ab53939af1344d91bec6fba",
|
||||
"blk.13.attn_output.weight": "ca01cf6d2b8ece2fb3b0f56f1eb76194471ac27b54fe264f99c909f5eb7fef4a",
|
||||
"blk.13.attn_q.bias": "2f5ecebafe03b1d485b93c41cff756ca57fb65b02e9d8336f14a3d26ab5d159a",
|
||||
"blk.13.attn_q.weight": "f557f8acad7f0fa62da06b5da134182fe04a5bed8bdb269e316f970c9cc440fb",
|
||||
"blk.13.attn_v.bias": "a492a88ae131e95714b092545a8752eaea7c7d2f9cb77852628ca8296c415525",
|
||||
"blk.13.attn_v.weight": "d1220b1fe9f1cc0a5a88ee239d65fec900f5eaf6c448b6c2cbe74c81e15ed333",
|
||||
"blk.13.ffn_down.weight": "53184e33440b49848a896304eb16a983efbc6b8bee0b93de8c8de716e1585fcb",
|
||||
"blk.13.ffn_gate.weight": "684bf8896f148c851506c62717e45c426921b93c10d536ecdeb0fb28259a106d",
|
||||
"blk.13.ffn_norm.weight": "6cb4e547ad8665eb7c174855c08afe1e5490fece66122522c1e9e8132d9064eb",
|
||||
"blk.13.ffn_up.weight": "c64107897e38c06727075aba4ea7940b2cdd0e278b5c555dffb2790ef553bb57",
|
||||
"blk.14.attn_k.bias": "2814ca9b160b16ae39557c9b629482fbe3a7592d372c1e1bf1ac59a2d578fde1",
|
||||
"blk.14.attn_k.weight": "3377177396463afba667742972920ebb45dfdc37e9950e1f0e1d60a2f936b27d",
|
||||
"blk.14.attn_norm.weight": "5cae870477d51dd35a6d22aaeacfce4dff218ffba693820ede6a4e11f02afd6d",
|
||||
"blk.14.attn_output.weight": "3cfe9ccf3d48ae9e95b93a132a1c6240189a277d764f58590fb36fdbb714cad0",
|
||||
"blk.14.attn_q.bias": "6a75acc2f090b2e67bfc26f7fca080ae8bd7c7aa090ec252e694be66b8b8f038",
|
||||
"blk.14.attn_q.weight": "5ef45c86d7dda1df585aa1b827b89823adf679a6bb9c164bd0f97b2aa6eb96f1",
|
||||
"blk.14.attn_v.bias": "5534480443e10ed72c31a917f3d104b0f49df5e6dbfa58d0eb5e7318120e3aee",
|
||||
"blk.14.attn_v.weight": "58f45cf3240c4623626ec415c7d5441eaa8d2fb184f101aba973f222989422d1",
|
||||
"blk.14.ffn_down.weight": "2dc82a0f20c05b77512458738130d8d05ce150cc078680ae7ee6dd7ed68d955d",
|
||||
"blk.14.ffn_gate.weight": "d4a6c6f0fcccddfd1fcaa074846622f4a74cb22b9a654ab497abdc1d0dde9450",
|
||||
"blk.14.ffn_norm.weight": "777e444932a0212ff3feac98442444e17bd8a98cb758ea3356697d0846d12c56",
|
||||
"blk.14.ffn_up.weight": "6b75f6bd00195198447b69a417ed9d98f8ca28b3cb8be82f4bad908be0777d57",
|
||||
"blk.15.attn_k.bias": "2d07211a58e6c2f23aa3a6dc03c80a7d135dfb28726b60b0e0fdd0f35ea5c37b",
|
||||
"blk.15.attn_k.weight": "e77f3c0075a1810e70df956cc51fd08612f576cc09b6de8708dcae5daedb0739",
|
||||
"blk.15.attn_norm.weight": "379a10d90609a5d5ba67d633803eda1424fc61ba5cca8d3bffe70c8b18b58ebf",
|
||||
"blk.15.attn_output.weight": "402751c12ee9dbc9db5e3bf66a7b23ebe7d36c0500e0be67be4c8b1c4357fa62",
|
||||
"blk.15.attn_q.bias": "acb37fc409ee725ceedf7a3a41b40106086abc47b76780728f781942c5120208",
|
||||
"blk.15.attn_q.weight": "89cd3047a09b46ed2bb57c69dd687f67a1f0235149b30376fa31b525898e4a55",
|
||||
"blk.15.attn_v.bias": "f081a37289cbe811978feb4da3ef543bdeb7355414d476f44e09b498da10cb2c",
|
||||
"blk.15.attn_v.weight": "8404f242a11e6d512c9ead9b2f083cda031e9b269f8a0a83f57ee4c56934764e",
|
||||
"blk.15.ffn_down.weight": "93438f43ee8cc4f1a7fd3840a6afdd5f02123e76db4f0d9474430c0100d148fc",
|
||||
"blk.15.ffn_gate.weight": "ff935a2698843e87fad9dbf7125f53e460190ec71ee128b650b3fc027fe37bfc",
|
||||
"blk.15.ffn_norm.weight": "4be80f199841cba831982e988451e1833c3c938a4d6ca1169319087bf0bd723e",
|
||||
"blk.15.ffn_up.weight": "ee9ba63c66d71053e33551ddd519878bb30b88eeb03cfe047119c5c4000fb0a6",
|
||||
"blk.16.attn_k.bias": "3f5fbabed4510c620b99d9d542739295fa6a262a7157f3a00a4889253f8341b8",
|
||||
"blk.16.attn_k.weight": "8ca6eb139b281c257324cddea97a8e9aa7c048b53075cf00153123b967c27ee5",
|
||||
"blk.16.attn_norm.weight": "290157f005e5aa7dddf4bd60100e7ee7b0baa7f11ec5c2cea5e0ead2aad3a4c6",
|
||||
"blk.16.attn_output.weight": "b1f4d80a7447f08f1c331712527f750d00147f35c042442ade96fd029dadc5a1",
|
||||
"blk.16.attn_q.bias": "e3e4e442ad4416791b468cad8de0d0d2d68c7e7df8d06002f4d49b4da9cb25e4",
|
||||
"blk.16.attn_q.weight": "cc7392fa5bb1107d3816e7e7363de252d37efd4165d065e258806291ce0a147b",
|
||||
"blk.16.attn_v.bias": "a7629830f2f6293e018916849614636d40b1bcd11245f75dbc34d38abae8f324",
|
||||
"blk.16.attn_v.weight": "b6c7856c7d594437630929c8cf3b31d476e817875daf1095334ec08e40c5e355",
|
||||
"blk.16.ffn_down.weight": "f9c0a777a00170990a4982d5a06717511bf9b0dd08aeaab64d9040d59bcbebba",
|
||||
"blk.16.ffn_gate.weight": "ed88f11bc3176c9f22004e3559ccb9830a278b75edd05e11971d51c014bd5cd2",
|
||||
"blk.16.ffn_norm.weight": "ab24abdcc4957895e434c6bb3a5237a71ff5044efb9f76c1a9e76e280c128410",
|
||||
"blk.16.ffn_up.weight": "99f594dc8db37f554efa606e71d215fbc3907aa464a54038d6e40e9229a547ff",
|
||||
"blk.17.attn_k.bias": "f236625676f9b2faa6781c7184d12d84c089c130d2a9350a6cf70210990f6bf1",
|
||||
"blk.17.attn_k.weight": "c2a4f20cd3e98538308a13afe9cc5880bdd90d543449c6072dedd694b511ee1a",
|
||||
"blk.17.attn_norm.weight": "5a9da4ee168311f487a79fc9d065a035432c6cafa8adb963a84954cf32f57a2a",
|
||||
"blk.17.attn_output.weight": "d5df7031e354186ce65dc09d6f8a92eb721c0319816f8596b0c8a5d148ed0a2a",
|
||||
"blk.17.attn_q.bias": "3212d5eeaa7ed7fac93cc99e16544de93c01bb681ae9391256ed4a8671fc6b00",
|
||||
"blk.17.attn_q.weight": "d18cd9aa7ee10c551cb705549fa1ae974aea233f86471c9a19022dc29b63d0d5",
|
||||
"blk.17.attn_v.bias": "a74ad11a1f8357742f80e2a0c0b3a2578fc8bbaf14c8223000767e07a5d79703",
|
||||
"blk.17.attn_v.weight": "da18ac0e90884436a1cb0ad6a067f97a37f321b03c70b8b03bf481339fef5c80",
|
||||
"blk.17.ffn_down.weight": "81a8a5d7a194fb53d976558e0347efbe9fdb1effffde9634c70162e1a20eff51",
|
||||
"blk.17.ffn_gate.weight": "72870d83ab62f2dcd45f593924e291a45e4ae1b87f804b5b88aa34cfd76dd15e",
|
||||
"blk.17.ffn_norm.weight": "cae39ac69b9bdaeefab7533796fdf11dbb7a4bdbdeed601e20f209503aafe008",
|
||||
"blk.17.ffn_up.weight": "e7cb40b0842468507cec0e502bbed8a86428b51d439e3466bc12f44b2754e28f",
|
||||
"blk.18.attn_k.bias": "8bfc02b94f9587aa125e2d8bbc2b15f0a5eb8f378d8b3e64a8150ae0a8ca3df2",
|
||||
"blk.18.attn_k.weight": "434bc3b3332ea48afee890aa689eb458a75c50bc783492b0cbf64d42db40e8ad",
|
||||
"blk.18.attn_norm.weight": "d6ffc09396c42a70d1f0e97d81113eee704d3bfc9eeae2bed022075a5dd08075",
|
||||
"blk.18.attn_output.weight": "133f001f81f3b082468a7de67cb2e7a76508fce34bcc4dee7f0858e06eee082c",
|
||||
"blk.18.attn_q.bias": "758d0e28bf5e660b3090aafb70e2a3191b4f3bb218d65e9139a086ceacaf599f",
|
||||
"blk.18.attn_q.weight": "12d7b86fc1b09b9fa7f8b7ed43d8a410892cec8672d0c752f8346f6193343696",
|
||||
"blk.18.attn_v.bias": "9efd15bab0519462431d6c6e8a5b7dd4e151dc449468097ee0ddca369c0ecc2e",
|
||||
"blk.18.attn_v.weight": "f631231a79d4a2e9730fb2e386d8c18621eb3fb7900fbfdff5e6d52cc42db122",
|
||||
"blk.18.ffn_down.weight": "874a2dddf456f3ab56b958b0860d71c8c680a6f89322c9bf6b2f32a113592300",
|
||||
"blk.18.ffn_gate.weight": "4549ef8976c345a511df4a7133bdaf6fe387335f52dfd8a4605a8ae3f728c403",
|
||||
"blk.18.ffn_norm.weight": "80c258a2536a860e19bfcbd9f29afa13214fbb4c34bde0d4da51287d354e9a59",
|
||||
"blk.18.ffn_up.weight": "8b03308a581457a3c038b7a086f3cdf14941d7ad4107c4bd6d9d6b062fd00d73",
|
||||
"blk.19.attn_k.bias": "e77f7b0c8e3e0a9b0d61918cd88371047752a1b02b1576936f4ec807d4d870ee",
|
||||
"blk.19.attn_k.weight": "a2a318e93355230c0d0f95c441b080bf9c4914507255f363fb67a5e771d4d1e6",
|
||||
"blk.19.attn_norm.weight": "9a4bdeb3970be21ac74a94c2c81eb36986533db81b78db6edec48d9802910d59",
|
||||
"blk.19.attn_output.weight": "2369b103dd3947e2cef02b2669b405af5957fb3a7f9d0ff40646078c4b4317ad",
|
||||
"blk.19.attn_q.bias": "e20bf427bef69059ae84a5d9f98f7d688489627f198fb6153def018ff9fd2e34",
|
||||
"blk.19.attn_q.weight": "45a3bb3bdfd2f29dd76e5f78ddae73678b9a2a85dfaf609e460240ef5b7be2ad",
|
||||
"blk.19.attn_v.bias": "a441f58a3e02ed86ee1819eefc9bd4e8b70d11b864a929d58a2c2ac0aeb8203d",
|
||||
"blk.19.attn_v.weight": "30b0b04480c510450a7abb2ce9fa05c65b150a3cc4dc76f8916bf8d013f1b6be",
|
||||
"blk.19.ffn_down.weight": "eebb9ab8fdb6a6efcfff8cf383adac9ec2d64aeeff703d16ed60d3621f86c395",
|
||||
"blk.19.ffn_gate.weight": "3fef1493029298378886586478410b3d2e4e879f6aa83c07e210a7ce6481817f",
|
||||
"blk.19.ffn_norm.weight": "e1be99ea1e8fb9678f7b8ba200f3f37e03878f3574d65d57bcd3a9fd796e2112",
|
||||
"blk.19.ffn_up.weight": "f07cf25e09394fb69fe3ef324bdc0df9a4cecf3dc53070b8acc39e6d1689bf82",
|
||||
"blk.2.attn_k.bias": "b29baa8221f125eff6b8ac1a950fa1d7cfc1bce7bdc636bf3df7d4065ab6466c",
|
||||
"blk.2.attn_k.weight": "4bd0c179bced8bc37a09f5748c394e0cf50273942fb38a866e5cf50b6c96c437",
|
||||
"blk.2.attn_norm.weight": "07b3edc6a6325c3428aa12f29bcae0be0de363ce61a6af487bc5c93fb8c468d9",
|
||||
"blk.2.attn_output.weight": "056b5b31dbc81087c81b9d41c25960aa66c7190004c842ba343979644d7f4d88",
|
||||
"blk.2.attn_q.bias": "479b6212401e097767c9d52b12a1adb8961c0fce9fcaaab81f202a9d85744376",
|
||||
"blk.2.attn_q.weight": "f89196076f446a6dd8a9eee017f303504f9c03094c326449cee5a7fc0a97fade",
|
||||
"blk.2.attn_v.bias": "ef9b1b986dbd9d7291027a88b67dc31434435b20e76e4f1e9d6273ebd31224f0",
|
||||
"blk.2.attn_v.weight": "9322f4f00e85f8c0936845c51ca64b202a93df104f36886986a8452a8e4967a5",
|
||||
"blk.2.ffn_down.weight": "7beac0d2440dc49af33ededb85a6cc3ba23ab33ad3ffa5760714b2ef84d94f6e",
|
||||
"blk.2.ffn_gate.weight": "818a93864a5890c1f4dc66429004fad07645a50142350e9bff9a68fe24608a52",
|
||||
"blk.2.ffn_norm.weight": "152c924d5514942ad274aafb8cc91b35c1db3627c3d973d92f60ff75f3daf9ba",
|
||||
"blk.2.ffn_up.weight": "9c9579e600f209546db6015c9acfeda4f51b6d3cca6e8db4d20a04285fe61a37",
|
||||
"blk.20.attn_k.bias": "fd22bfeffb63d818ce2ff1ea2ace0db5d940f7a9489b6bfc1ec4a5398848d7fe",
|
||||
"blk.20.attn_k.weight": "f74439bc74c2f9252130c9c28384fd7352368b58bb7ce3f2444cf0288dfff861",
|
||||
"blk.20.attn_norm.weight": "5c15d2613df87be6495fb7546b7dcedd2801d12fa5ecc02c877df889330e8f37",
|
||||
"blk.20.attn_output.weight": "6731a39286a67f6859832f96695732e579e14e0c36956eccd1edce3db11595b8",
|
||||
"blk.20.attn_q.bias": "04466e5a3f454a19b9b433fc2585396feac780027ece7ccb4e4bb3e406fc14d8",
|
||||
"blk.20.attn_q.weight": "ead4c71daaeb17bf20d014a34c88b97f238456488e815ae0f281a5daf6fc99b8",
|
||||
"blk.20.attn_v.bias": "adcc848e043025de9bd55ccb14dd8fb6343e8b5185ed07e12964be41d0faf99f",
|
||||
"blk.20.attn_v.weight": "81bfc23f83526386a4761c2c16b6a93cd0bbf9d846c1a51b82c71f1474a465f1",
|
||||
"blk.20.ffn_down.weight": "9bf660af3bafad919d03173c89a65fc9c89440a76c42c9e55e4d171076f3c17f",
|
||||
"blk.20.ffn_gate.weight": "c04b4f3ccce44917ee228b998e2c19dd702aef10a43413afb152e808b5ac5c42",
|
||||
"blk.20.ffn_norm.weight": "3d5b555d7746a71220143c6b8fff5ce4eb63283d9d9c772f1233d848f69f4ff4",
|
||||
"blk.20.ffn_up.weight": "d7a196505c39e5469dfc7c6958bdbb54e93629ac1a047a6663ed96b318753094",
|
||||
"blk.21.attn_k.bias": "4db1f48e5c6a3bc5720a5da813bbef08283e6269e12d83f8a9c54e52715d8011",
|
||||
"blk.21.attn_k.weight": "c687b2f0e132a5e220a2a059b61aa2a537f37d8a674d7709f87880637b263b31",
|
||||
"blk.21.attn_norm.weight": "ec23b0ff847a4b45585ab8e04f10fc20bb1637c5f1fbcdc4d73f336bcb5d1bd0",
|
||||
"blk.21.attn_output.weight": "01255390576316c1731ef201e32c6e934eba356c28438cd06d9027ac6a3ff84f",
|
||||
"blk.21.attn_q.bias": "3098f37205a15418e1681e407c82b7ce7c6fda6c6826b0590a13e1b68a38a1ea",
|
||||
"blk.21.attn_q.weight": "30ea62cbb702a5359229dc96819df17ee535e2e9988d044b005c73ea536e1005",
|
||||
"blk.21.attn_v.bias": "7bbedb2c22a04737f21993115701d4a06b985b7ca3b64681f53cd1be8d7ea39e",
|
||||
"blk.21.attn_v.weight": "e11905e63579e36fbee978062af7599339ae29633765a4835628d79a795ec8df",
|
||||
"blk.21.ffn_down.weight": "84def2ffd8aca766f9ce12ed9ac76919ab81eb34bdeae44fa4224417c38af527",
|
||||
"blk.21.ffn_gate.weight": "4e99f05377b4a0b8d875045530a5c59dee6a46ac8a45597f6579f6fdfa800787",
|
||||
"blk.21.ffn_norm.weight": "af48f13d03fba38ff8794a5f5005e666e501f971ca2e30bbded2777a8096f37d",
|
||||
"blk.21.ffn_up.weight": "a29541c39a6acbc364be86994632a5bf55d701027cb7f23320f8c6d55ee42c91",
|
||||
"blk.22.attn_k.bias": "c97f84db6c75422df6ef5768676d4e9abefaa3b8337aa2730ff260f8fc350480",
|
||||
"blk.22.attn_k.weight": "af9a0c56f68779513e95be11611b7be6175ddae27d48bee9dd72fdbf05f6cbfa",
|
||||
"blk.22.attn_norm.weight": "1c7518eb5bcff4a202c6f4a2827f14abd76f9bcc64ce75fe9db60b69437a5c9c",
|
||||
"blk.22.attn_output.weight": "1abcf1f3caa2f59dd018646b93f9cf8fd30d49e98a473e6a8704419a751be46f",
|
||||
"blk.22.attn_q.bias": "7221e01cb692faf2f7f8c2eb6e2fac38a1b751a9c9fdb6a21a0a936eb0bf4b96",
|
||||
"blk.22.attn_q.weight": "faaf8fb7b6c19f343d47f3ea6b57151fb46c787e0b3bd2c292fd327d3d4d8e35",
|
||||
"blk.22.attn_v.bias": "3ec05942e82d735de99dfd0d8228d8425e63e2fc584da98b3326bdef89ecb2e5",
|
||||
"blk.22.attn_v.weight": "42e7b0ad06db76227837da9d4e74b2db97f3df4050ecb3a87cb9b55e08dfcb42",
|
||||
"blk.22.ffn_down.weight": "87ef98ad2d0e824b0fa5ad8aa18787162922e527c9b1b721a99bc07d3bf97c82",
|
||||
"blk.22.ffn_gate.weight": "562d6e5a1654b03aaa0e33864d23c10297fd4bcaa72d30fac69fb771ee1df9d6",
|
||||
"blk.22.ffn_norm.weight": "f8a405dee467749d59427ce05cdd4b9c11bb18934a89258ea461f013b7d251f5",
|
||||
"blk.22.ffn_up.weight": "90e1f4ae4062649d4d838399eb353e8bb8d56a49982b6a7f64aa3945377f7187",
|
||||
"blk.23.attn_k.bias": "9ad22178a85f3be7e25f5aff462f31627466364f2f5e92f265cc91db0da9a8a8",
|
||||
"blk.23.attn_k.weight": "d813beffb10f03278f5b58eea0f9d73cdcb7b5b4045ae025c379592e854f7dfd",
|
||||
"blk.23.attn_norm.weight": "f583c9836044bdb056d6f8911088ac28add68e500043ae1f97b5d9158fe3d769",
|
||||
"blk.23.attn_output.weight": "02789911ac3b97f6b761e958b7dd6dc7da61a46a1be92bd0b346039ca7ecd2b2",
|
||||
"blk.23.attn_q.bias": "38c4970fb9b4f7e4a139258a45639d848653814b4bc89ea9849709b13f16414b",
|
||||
"blk.23.attn_q.weight": "eb694be9a5ab5858b8dab064ee4cce247dc757424e65282989bd4d015b8580ce",
|
||||
"blk.23.attn_v.bias": "0a25f6533aa7e7a152a4b198cf6c411c2408a34afa4f161bb4d5ffba2f74e33f",
|
||||
"blk.23.attn_v.weight": "187e1bac6b70f74e6364de226565aa8275ee2854d09cbe5895451a689596049e",
|
||||
"blk.23.ffn_down.weight": "88880dd9ba7ee80ade972927f810b5d2c30a69520c615190b27f9daabc0a8c5a",
|
||||
"blk.23.ffn_gate.weight": "5abec63197935ab3eb8e6de0a5307396ec46cdb1cc5de25d87c845f3c4a3e887",
|
||||
"blk.23.ffn_norm.weight": "60e1f5e6310c3a531c554a6bb7cd883aed58db1e51853f739436ea461c1843d7",
|
||||
"blk.23.ffn_up.weight": "3d7f502771743f4a634188dfcd8b8a384fb07467ca8528366aee59ddb25b7bce",
|
||||
"blk.3.attn_k.bias": "0b6b442ebbac29c8c4b67e8e3876d0382dd2dc52efdf4ab0ebbc6f71b6252393",
|
||||
"blk.3.attn_k.weight": "480f40584fbda692c26f2cee45f5923780b236f8b4e8ec7bbee0237777a0918d",
|
||||
"blk.3.attn_norm.weight": "39872be2af31bc9cd6b583ebba6fb759f621d586d66e5a2fc0b85991615a8923",
|
||||
"blk.3.attn_output.weight": "924b2c80d8513bf637f8ebb3756a340d9cf2243de723fd08d7f5dccd46b3f8b6",
|
||||
"blk.3.attn_q.bias": "863c9d848156847a3fe9bbc44415a4395245b5d13e95673c014fdb71e494ab0a",
|
||||
"blk.3.attn_q.weight": "bff73ee5de92fba8f6c089bbb19ce57e17ab3c9c29295712804bb752711b882e",
|
||||
"blk.3.attn_v.bias": "e1b6fea126e86189112fcdfee79ffc66a087461527bc9c2dc52dc80f3b7de95e",
|
||||
"blk.3.attn_v.weight": "7812b7f5133636f06cdbb4dcc48ef7803206538641b6c960777b37f60a8e6752",
|
||||
"blk.3.ffn_down.weight": "00b393d6a7e3ad9b5224211ccdbc54a96aae151f24ed631764ac224972a6bc82",
|
||||
"blk.3.ffn_gate.weight": "cfd63fa3a038af05dc53c6eeb3c192f1602f26ff24cb840bcf1510fcb37b5513",
|
||||
"blk.3.ffn_norm.weight": "7389fc240a282949580ea2f5b0d7973ac79f32f76dc0155b537bb6b751f8e27a",
|
||||
"blk.3.ffn_up.weight": "2a945f47090df9cb16f92f1f06c520f156f8e232182eaaed09f257b8947a2a62",
|
||||
"blk.4.attn_k.bias": "62533c31f0de498187593f238c6597503fef2a92e920cd540a96bc5311b3b2a0",
|
||||
"blk.4.attn_k.weight": "93e829868bffd980a8e589b9c4566cd81e6ce4296a5f357a2ae93febe1284156",
|
||||
"blk.4.attn_norm.weight": "9e0aaa4bbdd1389890f8abec20533f3ab16d61b872b1a8dbd623023921c660a9",
|
||||
"blk.4.attn_output.weight": "74467d6f44357d67f452ac49da861468b38e98057017bd38bc9a449f9d3538e6",
|
||||
"blk.4.attn_q.bias": "8e6d9026fd69b314c1773c5946be2e11daf806ef22a5d91d744344fd30c58c59",
|
||||
"blk.4.attn_q.weight": "e5bfbafd94a4d530f3769f5edbba8cc08d9b5bee8f66ebf4cb54e69bc0b7f63b",
|
||||
"blk.4.attn_v.bias": "20c570f92022d9905eb85c0e41d1fdb30db22007a9628b51f512f8268d6c34a2",
|
||||
"blk.4.attn_v.weight": "9638d459d61da03c9dd34dad985e03c43b4f8a5bc9701a82153478329b0517e0",
|
||||
"blk.4.ffn_down.weight": "9d91b06e89d52f4365dece7eaeec50f81e52cb2407b333248a81e6e2f84c05b8",
|
||||
"blk.4.ffn_gate.weight": "bf6350a79c6a6ee9146edfd788b88d4a4c2b54db1aa0adcc1464dbba8a84b646",
|
||||
"blk.4.ffn_norm.weight": "11a70a6b9f7ce336292f4e3a2c6c92d366d4ee4306ad4fdb1870fde107e9cc31",
|
||||
"blk.4.ffn_up.weight": "64f23f493d02b147a72a59605e6b7dd1c4c74f6813a38a2a60818bd66f697347",
|
||||
"blk.5.attn_k.bias": "f6c2c279c0ed686f298ad1e5514b5cd882199341f896abbb2c2129d4c64ce9c5",
|
||||
"blk.5.attn_k.weight": "0e682f75870abf9efaca10dac5f04c580f42820ecf4e234d43af967019acb86f",
|
||||
"blk.5.attn_norm.weight": "01efae7653705e741932fcd79dff3be643d7e97f4b5719b887835dffe44b3a82",
|
||||
"blk.5.attn_output.weight": "69e841d00d196acc489cd70bc5ffbbb63530ac5fabb169d40c4fb3a32ebb8ed8",
|
||||
"blk.5.attn_q.bias": "f3304d76ccd44fed887565857c8e513b1211d89a5d3e81782de507ab3f6fc045",
|
||||
"blk.5.attn_q.weight": "98612a6b7920a247853ada95c240807d4ca8e43604279e7a2fc9bb265ae40469",
|
||||
"blk.5.attn_v.bias": "39940a9b353ceed3edfd4a39b985c9520490aa1b9f11749c94fdf6d879d1a259",
|
||||
"blk.5.attn_v.weight": "839f84b828cf83aecf479a0dc7bc86cce05145ef77dcf29916dc3e0680f5b665",
|
||||
"blk.5.ffn_down.weight": "1f48cbb0960f15e06ab8a3754ade792995a655856389ddbca629c07e89d1b114",
|
||||
"blk.5.ffn_gate.weight": "33d8219fce3189e1aab376039896eebd4ad36ebd26a8278cd19b26e4357e4f81",
|
||||
"blk.5.ffn_norm.weight": "0f4a0f83d37127fa4483f2905cb4f38ef6ddc71584b6cb05632c62a9af313dda",
|
||||
"blk.5.ffn_up.weight": "22a64a11e5f0a1ff45ca327bf9e1efa258f085ff6a96edc398b7474f725b4514",
|
||||
"blk.6.attn_k.bias": "baa91df99d4df2d25e8d590bca4e334b97f2d9aa3df8e748fedc8a6188499111",
|
||||
"blk.6.attn_k.weight": "121f3b9f4b9491996499392e2688a929cafe102a67920b4cb2a039349c43d8eb",
|
||||
"blk.6.attn_norm.weight": "b4cf987e923d71f2f84c58d20ea8af7576b225bf61952145b489fdd395e3d411",
|
||||
"blk.6.attn_output.weight": "a112642150a138d54b2a4038042fd33619035a35694771e966f3575856c635d6",
|
||||
"blk.6.attn_q.bias": "a97ea10469cdfa3fdddf8bad6de683ef99f6170eb8d29d15dcf6bf4bce37c5a3",
|
||||
"blk.6.attn_q.weight": "d80c787019317a87361de6bbc7df6701357216bdd9b404522cede34a719a5500",
|
||||
"blk.6.attn_v.bias": "d846269db9cd77ae28da26ba0914cace1b6754bd5301af9c44607085dfcbd2d7",
|
||||
"blk.6.attn_v.weight": "06567c433e8a391647633291b50828a076ad7c2436106bb9278c60a3f8fccb3b",
|
||||
"blk.6.ffn_down.weight": "f15f66f56b3c474eac8c6315c5fff07c3e29c6e483d7efd4d303c7f43814be91",
|
||||
"blk.6.ffn_gate.weight": "47768f89c6da8eefb29adb766ff4eb38c9dfd79320bbc1386248319fcbcf567f",
|
||||
"blk.6.ffn_norm.weight": "7f8195e6b148212967145fc9d86ce36b699cff0de026042245c2d344f1ef8510",
|
||||
"blk.6.ffn_up.weight": "53d7707ae4347aadb445289f9f87a008b72df5cb855b00080a605442fdd8edf3",
|
||||
"blk.7.attn_k.bias": "63e274df3217dde25b8369a383e480fe4f6b403a74385f15ac0b5db71dce2744",
|
||||
"blk.7.attn_k.weight": "f6fce88602f5945eee09767acbcad387d132614e6da39ae359f2bbf380d94b1f",
|
||||
"blk.7.attn_norm.weight": "bbf5dc7336c0f9a511afef6bf5efeffd78f1b83940850c3eb7eb20c621b75656",
|
||||
"blk.7.attn_output.weight": "d9fb907a138396a859cecbfcb377927308dc93c24c7fb52dba5eb59265feadec",
|
||||
"blk.7.attn_q.bias": "f02ba1318346af77e309f40aee716e2de7ee8cab67e67b17636db9bf40894fb0",
|
||||
"blk.7.attn_q.weight": "54a691e824be287a61c35c172edc01922ed792d2addeee029afc17ba6c7e11b9",
|
||||
"blk.7.attn_v.bias": "3a4f182f51e84ce862d558fb2751b91802b65d74596bb14d624808513a8a83ec",
|
||||
"blk.7.attn_v.weight": "a142fe6e106d3ab484e2dc6f9c72b8fc0a385279dde08deb1ad1fd05ac25deb1",
|
||||
"blk.7.ffn_down.weight": "8daf7e8c430d183a4d6ab3eb575fafa4b5e31689f68b290c8b370411ad9d0f12",
|
||||
"blk.7.ffn_gate.weight": "a2a786b45eb660994254b48e2aaf22f3e9821cfb383dee0ba04cc4350a2f8e72",
|
||||
"blk.7.ffn_norm.weight": "73828bbc8c9610cc139fcf03e96272648cdc291263251fe3a67367408deb69e1",
|
||||
"blk.7.ffn_up.weight": "e85dd0f63fed449ce16893c5795ea6a050a2d7a66d9534410a227e22c905dafa",
|
||||
"blk.8.attn_k.bias": "91a752a6e2c364e5ee6a015770fe289aece4911ae6c6bbfe74ac52f465465f93",
|
||||
"blk.8.attn_k.weight": "99c069e92c43a2efb74e23188256b3cabbbe06399878e681ce203a05d5da378a",
|
||||
"blk.8.attn_norm.weight": "c76d36d3cc06aa2a9edb1abf9f602bb7ed61ac9d61f8ef7ed736a1e619abe717",
|
||||
"blk.8.attn_output.weight": "ee5ff156a2625e1f203f65e69b514f9df04bd9a5e82b28e3876e16cf1c6f65c5",
|
||||
"blk.8.attn_q.bias": "8fbd868a93b330c8b0418b488c5301f42a7eb0c58445a4e515d56777f1d96ed5",
|
||||
"blk.8.attn_q.weight": "9f20ef86e80098ba52a3a31ebcc315bea3a614dac9cba7ac1db02f156db9b577",
|
||||
"blk.8.attn_v.bias": "c4813571d5d618742183a7890c0b89cd7f18e210c758f63aad564659bc38a26d",
|
||||
"blk.8.attn_v.weight": "ea88e1a4cf8bd56e9a88ada427d2b0cd352234827640757ee2a9ed594fb67a53",
|
||||
"blk.8.ffn_down.weight": "b0d1a7495811580b189aaa3e20ea871d6d01ed7b6c23e59825078ef786944ff2",
|
||||
"blk.8.ffn_gate.weight": "0a17c0caa0b06721c49b59b2a63a5dcbf744dd1cffa55962b404ba910c658a62",
|
||||
"blk.8.ffn_norm.weight": "f15f109d4a8e9d1ff7c71fa5bc6373df7ee80c5f7d1de3fa0d4849d747e36bcb",
|
||||
"blk.8.ffn_up.weight": "bbf4c5c4c5c8a0f9ae8b88e3cc8b86f81b98148722d5a350995af176c0b774f2",
|
||||
"blk.9.attn_k.bias": "a7f60d962686b8ca60f69643e0e0fa8614688be738fb0b1c6bd54de35c2beb5e",
|
||||
"blk.9.attn_k.weight": "dd80ce4adb00e338fc04b307e4c18a27071f4ba4397184a24d765e6e4a268ef4",
|
||||
"blk.9.attn_norm.weight": "721e6487547e2b3986ab4b4e2500ceade59d908bccf4436e1e8031f246deb2bd",
|
||||
"blk.9.attn_output.weight": "5a800af39107b363861e5f5173483cdcd644d8ac3b0c8a443b9c759d71285db8",
|
||||
"blk.9.attn_q.bias": "0a19b4925ea8ca8067acc909b058adc327de3874cfc94cc9eb4a106d3f370123",
|
||||
"blk.9.attn_q.weight": "93e84906684c0c7ede79967236d9fc8344da84a9f1daa04e8295c2c9b6b26a24",
|
||||
"blk.9.attn_v.bias": "615421f812f821e230ecde4e6da35d868823248355ce7e4e51e2d650ead565f9",
|
||||
"blk.9.attn_v.weight": "7f4913e289aefd9ceecbdaf9767b1e95303f5d59dd67ecb2cc15768477f4d08e",
|
||||
"blk.9.ffn_down.weight": "95d1b3933221e87dc4af70dd566daec9498bf358070b8d26f1fc70766a84a152",
|
||||
"blk.9.ffn_gate.weight": "530f2d04f6a1fbffaaa5f2fbc3a328ebed7b330e3af14b4fc7d8a51b13ad8d42",
|
||||
"blk.9.ffn_norm.weight": "28077de416217ea1df94b96017bef4cc562ab62e51b1a03a671c70abc29ce52a",
|
||||
"blk.9.ffn_up.weight": "b87b6190778aaee4695938e24ac6c90dbbee6dce7c5c2ab5bc26ba4564581822"
|
||||
}
|
||||
344
convert/testdata/c4ai-command-r-v01.json
vendored
Normal file
344
convert/testdata/c4ai-command-r-v01.json
vendored
Normal file
@@ -0,0 +1,344 @@
|
||||
{
|
||||
"general.architecture": "command-r",
|
||||
"general.name": "command-r",
|
||||
"command-r.attention.head_count": "64",
|
||||
"command-r.attention.head_count_kv": "64",
|
||||
"command-r.attention.layer_norm_epsilon": "1e-05",
|
||||
"command-r.block_count": "40",
|
||||
"command-r.context_length": "131072",
|
||||
"command-r.embedding_length": "8192",
|
||||
"command-r.feed_forward_length": "22528",
|
||||
"command-r.logit_scale": "0.0625",
|
||||
"command-r.rope.freq_base": "8e+06",
|
||||
"command-r.rope.scaling.type": "none",
|
||||
"tokenizer.ggml.add_bos_token": "true",
|
||||
"tokenizer.ggml.add_eos_token": "false",
|
||||
"tokenizer.ggml.bos_token_id": "5",
|
||||
"tokenizer.ggml.eos_token_id": "255001",
|
||||
"tokenizer.ggml.merges": "902a060cac8884a5793d2a857dd2e53a259de46c8d08c4deb243c239671e1350",
|
||||
"tokenizer.ggml.model": "gpt2",
|
||||
"tokenizer.ggml.padding_token_id": "0",
|
||||
"tokenizer.ggml.token_type": "b7a352ccd1c99d4413bcf452c2db707b0526d0e1216616b865560fab80296462",
|
||||
"tokenizer.ggml.tokens": "815ac90ff23565081522d7258f46648c8a0619eb847a9c7c31b238a9b984e4ae",
|
||||
"blk.0.attn_k.weight": "6fcfdb466f9ceb1229404ce4ec4e480751b8d00da12707a11783dad7256cb864",
|
||||
"blk.0.attn_norm.weight": "6063317f731371864049c7704a70772f1eb632194201ebdc2ed0f8e483507c72",
|
||||
"blk.0.attn_output.weight": "920f49716a1e2fc73b6794ec777947f1c122701e63ed302422ac89e90f06e9da",
|
||||
"blk.0.attn_q.weight": "ddbcd7cde197e632564ac58e4f25d9e3a8ca52917329eeb6081eb41a797932ab",
|
||||
"blk.0.attn_v.weight": "318fc02a189d87420f0cbf57f47f11e00c21ec1ed472ce0a2a895b44f7fa0fca",
|
||||
"blk.0.ffn_down.weight": "aa71975b6eb1f4c77b03d2ac4a194cf8d95718efac741bb12f0f3ff79a27f9bc",
|
||||
"blk.0.ffn_gate.weight": "42967702fa0bc738b88dc50007ace26dbe74a5a9e0978124dd093f818241a9e1",
|
||||
"blk.0.ffn_up.weight": "5282c8788b086bd30f46525e7995a17464882a72703fd27165491afdd8bfd4af",
|
||||
"blk.1.attn_k.weight": "cd248882e64fd2c3402c44790ebe12440133dc671b6893fdad0564c461973adc",
|
||||
"blk.1.attn_norm.weight": "ba84e1c8fd30af6ec94208db4078befac8c921aad3acb887812887f3282ea2be",
|
||||
"blk.1.attn_output.weight": "2efa3ef7c5666ccceb05e339b83ad680cc0d2c3ec78203f5da5959f23a80e14f",
|
||||
"blk.1.attn_q.weight": "5106f2e255358a1303c22e8b5f0ec044852bb30a866c52cabefd30017a7a6b7d",
|
||||
"blk.1.attn_v.weight": "a211a634a1a5df1d5f973645438be0461dd922210f9747c6b04e386c7f1ebe95",
|
||||
"blk.1.ffn_down.weight": "37093afe48d32c578ec956c9ed85242cd000d6aa979e60526aafa10c822dbb10",
|
||||
"blk.1.ffn_gate.weight": "469860819e9159caefb1aad0bc66db790f3393f05fd87b08e52256a7ed256543",
|
||||
"blk.1.ffn_up.weight": "736742c97d35d1a011f9cafd3c0ce947ad559bb2fba6da73c816f6bfd0fa9aeb",
|
||||
"blk.2.attn_k.weight": "92c219d92804d832ab404bd6dc7339c90877bb7cf405dd030c121f8b27757739",
|
||||
"blk.2.attn_norm.weight": "61e4466069474b76b6d1e702566420eb669faf3556b00ff7b824784aca13a2d6",
|
||||
"blk.2.attn_output.weight": "d2fb38a2b2171fd91caf037faa585a62225819aa232d86fd4f7f9d2c3c8a45e9",
|
||||
"blk.2.attn_q.weight": "f6faf5cc6844e3daa4f9f68d90f5458c64879de68a7728860e38374e30c3429d",
|
||||
"blk.2.attn_v.weight": "f340ef8f7341d987a6f37c0e9afe0aef5be67be00c0ce5f57612daf73319cce1",
|
||||
"blk.2.ffn_down.weight": "c7be61a701d779860b621b143fb6365b607bf99ec7c0f153b07908ac8120885a",
|
||||
"blk.2.ffn_gate.weight": "b64f0878187bd3392abfa4c3e8ad2f8b4c133903e54246747ff8f3b4639ad83e",
|
||||
"blk.2.ffn_up.weight": "50b11c712652e90ee7428dbb45cffebb80662ac982bc72bd9eafff361b5eb5a8",
|
||||
"blk.3.attn_k.weight": "2b7bcbe9ee5c9c630c8c8d7483887e78b73581016f4cbb6933db2a147a25f431",
|
||||
"blk.3.attn_norm.weight": "0181dac7f4eee7252980323e8032cf339bef2046ce0a16c0fd72af7c98a8a37b",
|
||||
"blk.3.attn_output.weight": "aef8843b636ce231da9e7c9acbee197883cc15df0e2887709324c6a50f16da7b",
|
||||
"blk.3.attn_q.weight": "55404130fa10e81322d33eb378aa0de31a92990ce7730f1338c0ace0406bb1b1",
|
||||
"blk.3.attn_v.weight": "76f7fb8040d82b957d689ce34fea2302a6640ad5bbaa0052ad2b7ebce270c33d",
|
||||
"blk.3.ffn_down.weight": "648628933eff3b357c3729c33c5b1ae51c28e59b9c19acd1601a2ff7c5d5d9a5",
|
||||
"blk.3.ffn_gate.weight": "6a588885d16e98d5f50ebed05af089154f680085ca9c97691e5b489088630a4a",
|
||||
"blk.3.ffn_up.weight": "e12455a1d702f4986e1a663493e3d5102b367af74d45557522002a35d63ecac2",
|
||||
"blk.4.attn_k.weight": "40d943380a8a85e4eab147934bf6e16f23cc8ab753f6636526382c074d182288",
|
||||
"blk.4.attn_norm.weight": "4ab2c098983d4599fe540eef624c4df954adb7473faebda7471ef0ba4134814c",
|
||||
"blk.4.attn_output.weight": "d14b91e40f58bf4a3c8c2eca0b12bb541de406574af39027d56f6c588a147082",
|
||||
"blk.4.attn_q.weight": "e1224960a3562107488589f883fa32414bae41712fa8dbd47c5f3e3a7801452f",
|
||||
"blk.4.attn_v.weight": "063f297bc4aa6e709fc32c4c32e35af7d07d80e83cb939b76adbba858006c03d",
|
||||
"blk.4.ffn_down.weight": "f88a18020c5e1caaa29596895eb348e76ee5bfad27ed57651a86cd8cd1f9b5aa",
|
||||
"blk.4.ffn_gate.weight": "48e7e1eed3fb52e92e61d3557dd0ec002418327090e034ce4322fd68542266f8",
|
||||
"blk.4.ffn_up.weight": "1ca8a7aa17355b6ce0d9ad5539fdad3899fa47fd359c285fbfb31f19f47bf073",
|
||||
"blk.5.attn_k.weight": "2bdf15f8e73d068d972380f25d207004cf0bf3b5bfa46946803ba6fba07d9175",
|
||||
"blk.5.attn_norm.weight": "60448d7cde6e1b6467aa31bdea012e39cdb08c88081cee7d102dca4f93f766ef",
|
||||
"blk.5.attn_output.weight": "f9f687d7c457537f9fca8a4087a59f1c3bebfaf5537b94e42c831a13224f7799",
|
||||
"blk.5.attn_q.weight": "987db7a2ad68657a92625e1980effbb1f79697c2183f2b9f3b3a0570c51b0ab9",
|
||||
"blk.5.attn_v.weight": "cf696891148f3e4783ad1d20f93462ae091eb8651c656bba9b662253b6263e02",
|
||||
"blk.5.ffn_down.weight": "c0662b0bd0929136005fb9d691fdd9b2c33867d9ce9622339a6a456b720b059a",
|
||||
"blk.5.ffn_gate.weight": "200bbdfab615d7a3a84719b6ced7751e3ce52757ef212d96f87798bc1de5e987",
|
||||
"blk.5.ffn_up.weight": "df5d23e7e035fb1b9d163da7ddfdfe38da6a37e86e96534dc02ad20f011b55b3",
|
||||
"blk.6.attn_k.weight": "c0dae2d272a7c5a2fa004bbb8475dbab362fc1f6d008e73d5a4434a9382ac6ba",
|
||||
"blk.6.attn_norm.weight": "51c57ac8b55e04354d5dca6bb9c0cf4177639d3b038e80209e33036209688f64",
|
||||
"blk.6.attn_output.weight": "229d97892c62f85bcdf431675250e01c976ad69ffa450b01fb543bf88f14a2fb",
|
||||
"blk.6.attn_q.weight": "c20e49621821bd46ed156e6823864a5bda4f317750e71ab8dc54e44eb48cf7c2",
|
||||
"blk.6.attn_v.weight": "53ceb1a2ee43fce3c7b5b33c58a9fc5ee7f44dc1c6f29bc9dbefc37582102dc9",
|
||||
"blk.6.ffn_down.weight": "7923c943b7629d560a032d1efa210d1d75c6692140f1be94464ee7ed24f44ed0",
|
||||
"blk.6.ffn_gate.weight": "57593d350361af753a6a39f53b066282634c0fb44f396f6f2966a574b01d8f8c",
|
||||
"blk.6.ffn_up.weight": "327b6a7a387098b8899d3ded04a4d4e7c658ca61b80d4e7b17594be232721602",
|
||||
"blk.7.attn_k.weight": "9ca48b87a10116fd8868e62b76f211d4bb91f166096be9061439ee2e1c3a5c20",
|
||||
"blk.7.attn_norm.weight": "cd56cfcc4e2ad6b96e23ea7b0d32b4caf236107d99a0b22c56760b62e63c8cfd",
|
||||
"blk.7.attn_output.weight": "7352b509a03cae2491ffc060e577d189341a0f861233f18c96f9d275dc4234bf",
|
||||
"blk.7.attn_q.weight": "2b3791c8c008c33ddbe12bedba8191322ceea2dcce5cf0eb7a93d40ad254e672",
|
||||
"blk.7.attn_v.weight": "3ae721d52466487a3d48150581e57f6d64ea1e83ab929f23b28c3d777422eeb6",
|
||||
"blk.7.ffn_down.weight": "3b6fa8ececdb3c34af3a5363863d6f94289c1c95bf47fce3a3ddcf184c5f0848",
|
||||
"blk.7.ffn_gate.weight": "dbd7df6c5ae5eb4adb859f0d36453813a4e289a359a1ba8f72d67fcbf21c3e22",
|
||||
"blk.7.ffn_up.weight": "de68380a334b4c5cfd4c318b0e9854aec59bd79aa0f0c30af3f56414f83482b0",
|
||||
"blk.8.attn_k.weight": "7303c4e4480abc72a7ee271811311199245fb5c2ea27a2bd3b8cad3a53a03c27",
|
||||
"blk.8.attn_norm.weight": "2e3d1921898d1b943ce1a1b6818546c8b471d6d542da24f51a8b514b8c3dd4ef",
|
||||
"blk.8.attn_output.weight": "30421520887b66bf97a18dbcdc283bc8d0b60590b612fd638a319a6eae923227",
|
||||
"blk.8.attn_q.weight": "73e064d5433c9b500068a1c31744dbd53f4ade298fb450a0e8c97f62cf1f8a8d",
|
||||
"blk.8.attn_v.weight": "27e21f8b9a9a8533e8178ca34a72aa1d786393d57302b7806dcdf3e51de511a8",
|
||||
"blk.8.ffn_down.weight": "bf694bd8e00047982108000e7b3dee7b225db8b19abc595e5697b6bbefd92e7c",
|
||||
"blk.8.ffn_gate.weight": "d55fdbf8606d9141b774b0500c58944fd1253b9e69d1f765eaa9a680b9f2ca40",
|
||||
"blk.8.ffn_up.weight": "1ae3f580655e7c8e8dd6c34fa4ac574fdfc5e3f1a8536da0c5442d3a2976f0e7",
|
||||
"blk.9.attn_k.weight": "b18080626012d8aabcf78542d6c7bf31c712bf55a70172fbfe173fcf34481036",
|
||||
"blk.9.attn_norm.weight": "2e3620620dc09998c6d3063a7d5de5433fbbae8c11e5b00d13f145d39140e162",
|
||||
"blk.9.attn_output.weight": "69c3c0e27ef1c0fc933eeb7b612b70909f18cde238873c0d576a2ba9714ef174",
|
||||
"blk.9.attn_q.weight": "68330e5aa28a28873c9a6e67f032186ef651df2df5844e0f27094ba349fbe4ab",
|
||||
"blk.9.attn_v.weight": "3df8d45a102be082d0793a51cb82aa62a43cd0e9d047ba4115ca0f2414b39325",
|
||||
"blk.9.ffn_down.weight": "1d6cc162b73745b135b4f040a0aac3c06d5135a3dc5b2421e7ee2af48662fd7f",
|
||||
"blk.9.ffn_gate.weight": "034a9d40fb1e32b534b45f4bccd65cbe43c4a6a3f5d01132bd245ca0005de5fc",
|
||||
"blk.9.ffn_up.weight": "c838c38d0e1a0ac0da17eb2a66023ed31929f07d8fcfe1cc546df26096c91f0c",
|
||||
"blk.10.attn_k.weight": "a78507cb72f744b86ceaa032596e74e5571c822d0226d334881169addb32cbd5",
|
||||
"blk.10.attn_norm.weight": "35f48d0b28ee0e6b4cad4e983925737562d64824be5b168b3e26df3d6b260cf1",
|
||||
"blk.10.attn_output.weight": "53712db06796de39b131323e7abf9a58551b6d52da6db66a471580386d396252",
|
||||
"blk.10.attn_q.weight": "efe08429ba196026b81cd1c471e1c7418afd9e966659feb3936b674aa0803b58",
|
||||
"blk.10.attn_v.weight": "7ec6055e134f89da0cbe79ec9f13ef2e442ac584b1f03c3e13e7d0cdad0078bd",
|
||||
"blk.10.ffn_down.weight": "37e66af4bcd1f3079e841e892255b8255070655901864ea3a8c602a7f681a640",
|
||||
"blk.10.ffn_gate.weight": "1825282bc34830d371c6edcc3c1e73e6ecc1e10f4aea0122dbb7acc1d6f7b1bc",
|
||||
"blk.10.ffn_up.weight": "819b3b276a4d4c14a35ed6682d5ef18a5e8ed468e5ce3f12e8c75ec18ac20ec4",
|
||||
"blk.11.attn_k.weight": "5327e6a2af82dfff0619a14971f5864a15553c36fead84e1af42c7630f2729c6",
|
||||
"blk.11.attn_norm.weight": "fec363b3c4a43036d2c635fb8aa9e122dd87ee79811839f2f6cd955be3373e7b",
|
||||
"blk.11.attn_output.weight": "ccf7b38f18ee8798b8a6a35018e2df3eb3e007de62876befb68025dd66c79763",
|
||||
"blk.11.attn_q.weight": "da8c4a1c824ffe174e39f126cd72f7ef83c56aff1259d452a1212de80f98f5e9",
|
||||
"blk.11.attn_v.weight": "d17ae6bb77f03982b55d341eb67acb5969e9ad3da5994b96eafc09793dcfe3a0",
|
||||
"blk.11.ffn_down.weight": "a6bac521e2791345f22c57205fa1c2f2f687794dfd24d0e98d50ae0d0eb6088a",
|
||||
"blk.11.ffn_gate.weight": "5ed902c488cb51ba5635f3df08258c5f84f31a679a00211ea5f9d8b824ef6d9d",
|
||||
"blk.11.ffn_up.weight": "ee9f1437eb890d2cf9df2574afa1cecf20aafdd847cd75b152d7eb74419afd34",
|
||||
"blk.12.attn_k.weight": "5a069c06e1019b0f889088e67458f7a11ec77fa190ada6069e46211f62219947",
|
||||
"blk.12.attn_norm.weight": "194d7e5fcc8c49aea62daf1940532419cf3c505afdce6be377286b677db5db8f",
|
||||
"blk.12.attn_output.weight": "6534995fd4d6fecb55e317add4b1723aba4d825e1e9471d0b08813dfdc247176",
|
||||
"blk.12.attn_q.weight": "4ab51ca519b5995581fa34f846276feca3b907ef2b51f192f6cc0b3263c3f5a2",
|
||||
"blk.12.attn_v.weight": "5652ca3fa81ef9a1ac1543d71fc6813f8517f8ec54b25c701f6f98061614830f",
|
||||
"blk.12.ffn_down.weight": "4b2c263f54c88516b8eb273bb8d9615b01c5c8b484dc70358adb91b50b300edd",
|
||||
"blk.12.ffn_gate.weight": "8f50c3c3e3e8568991d6c1b0e74b500cf4f208e7700bbb8e87c3f6a6d359b6b5",
|
||||
"blk.12.ffn_up.weight": "1c1a581fec1fbe959e1427fa513f400100b5e1ee9d83932630be9905fb49c231",
|
||||
"blk.13.attn_k.weight": "efd7a38c46f08d8376d82974f33c644e3a02220e142d63b1704718699a8a884c",
|
||||
"blk.13.attn_norm.weight": "d28fa4f1bd75abbd063b0e622e08f579c89cd0c0c5ce63c1952ec9f944f8ee13",
|
||||
"blk.13.attn_output.weight": "71e0068a639288718bdb70a6cfdefd50bc8b3ec3993347a65129e70001ca5827",
|
||||
"blk.13.attn_q.weight": "b97077adc92cff07a2e07d80ee38f214ad8713571c69cd5c70ebd43dc501ac87",
|
||||
"blk.13.attn_v.weight": "79b3e2749ab4b459c81e96e322b215f1e8af645eb346e176c326bd00cf6ed2fd",
|
||||
"blk.13.ffn_down.weight": "9f8687d11effa1db7cfecf7bec5631734bcf2962aad74a9f519144491e08ec85",
|
||||
"blk.13.ffn_gate.weight": "7d14dfa0543852e7777fe8fff29ca533744cbcf1ebcf10067e5adfc4eb345e65",
|
||||
"blk.13.ffn_up.weight": "852b9527b97fdab211ff3f832a660ee1d93ccb56906144c50f01319a6e8ee615",
|
||||
"blk.14.attn_k.weight": "79e926b20f36f66d58226cb358881f2f68ae7b468787d33cafae5110287a14a0",
|
||||
"blk.14.attn_norm.weight": "97d481b63deb0df6142c2c6cd23043720c62eb609e390f47a7113751c79974ec",
|
||||
"blk.14.attn_output.weight": "aa6e94d7176d5c79fbb89b96e5f13ce75702ce3dd23ee52986446da436a6c3d6",
|
||||
"blk.14.attn_q.weight": "214becb6d1bb460da9fb8ace0f99b9a5afa9edf7aa7acc19606c7401b11d6305",
|
||||
"blk.14.attn_v.weight": "488b0e6d7f1a7a2ed0972aaa6d10ef9c775ee5373460324efcf5b3e3da9311df",
|
||||
"blk.14.ffn_down.weight": "29c7ad16cf9542e30996a1a01ab95b844533b28051f04cc7949c371afb796471",
|
||||
"blk.14.ffn_gate.weight": "b7ef208f2b054803665b377f5a5980c122c026841809cf855c6ba06d1c3a885a",
|
||||
"blk.14.ffn_up.weight": "76a5cc28100748d79c4398ce7b9176aab4d661548b6293a82f99144812e5b70e",
|
||||
"blk.15.attn_k.weight": "a6b8f9e98ab878fa7ebc5d080978ebf2d050acc2ab2fa8ea9188eb10e27702c8",
|
||||
"blk.15.attn_norm.weight": "a26d07a9752d6dccb68e3a8a2a49fd0752cdd0a415e05547819bc37d9ba63d5e",
|
||||
"blk.15.attn_output.weight": "c63616c69048ccbee801e05be4f56d21fda21aa0cc470f41d57c31b4d9283a4d",
|
||||
"blk.15.attn_q.weight": "fd595a67bf96c6ba16eb148a9d02fa52fa3c1d33ed10be28a08f851409fd6e64",
|
||||
"blk.15.attn_v.weight": "1c5c9d33fa07c05d5f4ed0032c6c4aa83d863f0d31c94a66109d239dcd03cea3",
|
||||
"blk.15.ffn_down.weight": "585ea62ab8aff7d7d212ea5c1a03226fda6b68370c890b776834af70c948dcbc",
|
||||
"blk.15.ffn_gate.weight": "a13c63f86f879b03a573d5dd2a25cfd1f4dc73e8132e6454ecc23e538b4cdf6f",
|
||||
"blk.15.ffn_up.weight": "f7112450f57c12fcd511f049e0dc0b541625a107a7901c3261ed9e984299f65c",
|
||||
"blk.16.attn_k.weight": "2d2c8b11dd71fba6d1c106aa1673c113a5448653cca7eab897c8739212ed5003",
|
||||
"blk.16.attn_norm.weight": "95c2ec7be9469690e18a9a1779684acb3e9da44b13e263a0da840305646fbf8a",
|
||||
"blk.16.attn_output.weight": "31a65046e677f54dae654ded4e733479fcc0f7283d83076b7dc7cbcae8528230",
|
||||
"blk.16.attn_q.weight": "bfc6292b9c6d49b7118d08060242a138182eb182d136ba5dfaf469437c16081d",
|
||||
"blk.16.attn_v.weight": "68f81d037340217d87c7853ff4d6edfbc46d9e827ee6d5bff7c3f6238e3a95ad",
|
||||
"blk.16.ffn_down.weight": "bbd6629691950cef4d5113e1c6670e91b216a9b872cb92cee02dfda4d6c4f7b8",
|
||||
"blk.16.ffn_gate.weight": "63cb56f282b7401ed6c76e5bb6fdf1bf68a64f9af0c82c014209b55bcb5191d0",
|
||||
"blk.16.ffn_up.weight": "b54f39a2541063cbfb6f713aa81c3b69a04100e999aa2ebbeec195dc382eceec",
|
||||
"blk.17.attn_k.weight": "3d9ba49799cc56664ec30a002bcad61eb651294212a68c3ddb573eb042aef5a4",
|
||||
"blk.17.attn_norm.weight": "42ee0db4b9d63257bca0012a30b12737ead1caafeb5ed3d93c8f48ffec4b46de",
|
||||
"blk.17.attn_output.weight": "a38fd100f05c9041c592bc739e287de0b10d08ef2bda41a879225bdca9002f71",
|
||||
"blk.17.attn_q.weight": "8a3bee285b0180a9eb35662e449ee4cbe16d992bdd48fb3a94bc4a347728cfa2",
|
||||
"blk.17.attn_v.weight": "d7f8f1b8b863494ed4392a1656775912e9b264ad36016547b12e832a1d6757d6",
|
||||
"blk.17.ffn_down.weight": "bb7ee58f61da8630972e25b621996fbe8ec06f4dc9ab1e268ab5b120c526ca28",
|
||||
"blk.17.ffn_gate.weight": "6b652dbf167fee09a45ebfd78d500ff6548fb2756dbe5343ffec3f7e6207179f",
|
||||
"blk.17.ffn_up.weight": "3b67f727e55e742715de978fab80457781e7a3762bc48f79d13b45dcb8de664c",
|
||||
"blk.18.attn_k.weight": "ff7fe57c57b90c6fcc0aefc39ec24593c3a7d1ea1c23770480075a015450e0f5",
|
||||
"blk.18.attn_norm.weight": "1d40faca082d2633ef0ccf19e121870dd6c7c3e2154607c7f3543fa96e99cb2d",
|
||||
"blk.18.attn_output.weight": "9adfecaaa397a92db4687efd5fcabfa0daef9e6b0493763b7ff5ebc185c43a6c",
|
||||
"blk.18.attn_q.weight": "ad1803eb9b291948639277afe981e666b07167eb3fcae903ba5b73bf86d8f50b",
|
||||
"blk.18.attn_v.weight": "308cf23399adccf27401a4ab60d74dac6fb9d4cd4b9c5940d9145118d1881b34",
|
||||
"blk.18.ffn_down.weight": "7de4ac9a561fb580619b745687dfd7ca8a69ef70471dee978741b80e9ff7bead",
|
||||
"blk.18.ffn_gate.weight": "0c66970f696b33bd5ee8f1f2fbcb41fd78fa5ccabdc927e11a4d5a4089f19c69",
|
||||
"blk.18.ffn_up.weight": "66a42e988e8a1f468fabf976c48e9e4bb045eaac6916ef16555ac101cd674abc",
|
||||
"blk.19.attn_k.weight": "a928ab50390bacbcebe2e4b66922498134ce22d7b93beaa87d6cf4ab52eb7174",
|
||||
"blk.19.attn_norm.weight": "b4a02c55b46c2a96aec9c64a254087cf48e6c1d4b6f31782c77a46fc4daebad1",
|
||||
"blk.19.attn_output.weight": "b768319c641dff1eac5d1f8ceb960c9899c795bf2b24c1d6bf70aa24fda45f77",
|
||||
"blk.19.attn_q.weight": "79ef3f57d187d3954a26362096e1b6c222d76f537dff73e034d6e9999935b8bc",
|
||||
"blk.19.attn_v.weight": "ce13d6b13e24fcb2d5bc6a2662e5bd295b31b12db10a6d0307f86cf29b8d5001",
|
||||
"blk.19.ffn_down.weight": "cf90d7e2137482cfd50934a8223ad774621d08554969da80a9712df5e6227eb0",
|
||||
"blk.19.ffn_gate.weight": "71ce30150f003b6eeb3bf7464e05b6ae615f135110d8e47f0a47fd973e537c0f",
|
||||
"blk.19.ffn_up.weight": "7f92aca0cc29866633feec701ec01a85a8ee2fd4e2b9630173a6cffb1d9d50ee",
|
||||
"blk.20.attn_k.weight": "a2df23159d6fb74ef28e14b61028fe8b00a693a2fc9234a980be74f20b958682",
|
||||
"blk.20.attn_norm.weight": "c6cd5f1b096fc5efa4eb59ca1c8c4bd28730f3dcedd59a63601663eccc6724ed",
|
||||
"blk.20.attn_output.weight": "896a8a166d0f006d4b09867ae4345426303cbc3fb13a18d3d4e1bde00f16dbdf",
|
||||
"blk.20.attn_q.weight": "01eb79588fe61baea0da43e99f4dc5939590e1bafd01e12dadb8326f102bfea2",
|
||||
"blk.20.attn_v.weight": "bd39630fdd5a7c859ac1addaf53e63faf524c3f32f5f4896d86b6e746b1d5c06",
|
||||
"blk.20.ffn_down.weight": "0304a5d39957a0e3f031c4bcc4549a135d396c8d97c8d276fd1c823ce86560c2",
|
||||
"blk.20.ffn_gate.weight": "117b79d595b1dca0c8b37586beaecc4d84411507276212dc286cde7fc36c9bef",
|
||||
"blk.20.ffn_up.weight": "6e799346db145c125f01783539749d3828fcc451cd4f10c5352f047a47e28714",
|
||||
"blk.21.attn_k.weight": "1c37e4c0664147e775bb006b226b9553e3421140cd96288ea755f81731ab80ba",
|
||||
"blk.21.attn_norm.weight": "00ae783a29000ccda5e4bdbff03df0752fb82805dc3f9b987500ebd80714476e",
|
||||
"blk.21.attn_output.weight": "7588b84f9fb19f15095b5265c60b4a4e7ae74bcc47d4607dfa5d0bfab6f136cb",
|
||||
"blk.21.attn_q.weight": "a65f1c0dd06d45bb97532d3e932689c1eecfe7359089b39174a96a149335cbc1",
|
||||
"blk.21.attn_v.weight": "4220b77e7d5e8709b4eef33a679b5dad11f297085ef44c9977f9e54ef08f7a2d",
|
||||
"blk.21.ffn_down.weight": "b8c082a0530d4b5328e67db0df84c5498f2af956de23c639fa0198ffea853950",
|
||||
"blk.21.ffn_gate.weight": "cd1b656ee72d00e9835ef667c19ef89a88de261eb8eb7c0e936e0f9ddf83ef9f",
|
||||
"blk.21.ffn_up.weight": "dc445f73e36ec7a3bd86884186b728f8e0187f32848c3b8b69d4d41f8571bf31",
|
||||
"blk.22.attn_k.weight": "e37cf0b893ec8b9ee8c78dd139b8d9c45cb997a3bc0c3d93a70ca1c3f6af8859",
|
||||
"blk.22.attn_norm.weight": "248a27838d3c46cc03a5c312facc84e2e0e2c990ef8401e93da25918497f88d1",
|
||||
"blk.22.attn_output.weight": "fc191a18f6d18332c66761f7ab28008bfe295dd1f5c8741a2488442f9e00d0f5",
|
||||
"blk.22.attn_q.weight": "4b193a2ab8bc2b085db18f2bf3eeba26e02b537b2cdd738160c8f14b165d0f5a",
|
||||
"blk.22.attn_v.weight": "7a60ce5ccac7e045e55ba1e1e85bd2a0f93f8c781daee96c5223665e22f0c666",
|
||||
"blk.22.ffn_down.weight": "e0a34fb4244e2c7168f3dbaa1904c15d339ec39999cdf27128bbaf619ee0a237",
|
||||
"blk.22.ffn_gate.weight": "8bac872d4b8549c8812f927efa309f1792b524f33601095fff61b826de5a5615",
|
||||
"blk.22.ffn_up.weight": "b67fa2b94dd901b6ec64c0853ce8ca2d86fe9cb1cc6d2f15fbbbe0e691c0c648",
|
||||
"blk.23.attn_k.weight": "2c32e66ad01942b819ac09a197c71579fe66f02226a264fdd72ad1e02c67a27e",
|
||||
"blk.23.attn_norm.weight": "825fdc94deb439cb93c713eeb077c1052b90ed658d6d464fc4ad3d611e911d48",
|
||||
"blk.23.attn_output.weight": "95ca6707a95b8750b0c7c5d379d368f0f2e7ebef631954e7d4d8ec0f41f13a3a",
|
||||
"blk.23.attn_q.weight": "6eccc84faca5fac015d1b26e2854501edcfd292a302228fe14cf99f5eb59a34b",
|
||||
"blk.23.attn_v.weight": "b343ac3d226040f1033ee049668aa1d89b1774bc18431965682e5dbdce78ccdc",
|
||||
"blk.23.ffn_down.weight": "9fc599befea8d3b1e342d564a110074f66d2542df406c4b90b6bdc5828fbb2b2",
|
||||
"blk.23.ffn_gate.weight": "488556c1b0c9f0b20b0c99b4bac2e0f4046b81edb601d7b91e7e5b3bab47d667",
|
||||
"blk.23.ffn_up.weight": "1088e291d7008dd9c7c2dd6830af686a8a84b724d123a016209bd5156d6898f1",
|
||||
"blk.24.attn_k.weight": "a923fbe35e61e009a53927d7828818e0592bb737d6a1106c4b0b5a1efc367e07",
|
||||
"blk.24.attn_norm.weight": "9b51aaaa939cefafdd9b13a7e5b74ac7fa2d603427e55a16a909d6f3f353750a",
|
||||
"blk.24.attn_output.weight": "1beb2baba56f8409466434b037771248c2f620ec5f53e15f44c271d5a2d9ecf4",
|
||||
"blk.24.attn_q.weight": "4b0194fe5bfae0c6bf6131dcf8cb6e2b994f6ea10b27cb03574f0f4f8cc0c950",
|
||||
"blk.24.attn_v.weight": "6ac34b1ab0f66226d85bca1194a7c212cd93d384ecbc8b8395de48aec0970a61",
|
||||
"blk.24.ffn_down.weight": "5508f74cb732a662c2936b32ac5e90742d172b9f961a747b0e5cba0e5906a89d",
|
||||
"blk.24.ffn_gate.weight": "095e39b8584403835f9bb1ac33e0e81f54175575e4800273d281b845bff381e7",
|
||||
"blk.24.ffn_up.weight": "2d43ec21637dda12973de367b0113ee9840b0d815bf6fce042f7c3f270b0b530",
|
||||
"blk.25.attn_k.weight": "9e2aee029f3d2c7f67dfc7926e72c8228fb978382c8e5a4701bbf82c93801419",
|
||||
"blk.25.attn_norm.weight": "220cd7164fb4cdbe22d26058e4153b26c27c7b5ce2bec8e95bf2c0ea08d23103",
|
||||
"blk.25.attn_output.weight": "a17f4a5dc6aa51f03dbd75602d98e9491767c205cdc2c3a5f8667fc54bbf7c64",
|
||||
"blk.25.attn_q.weight": "f60827496835c440c794bf57ce9780704d10a59d8229886bf75ebb18900ba4ef",
|
||||
"blk.25.attn_v.weight": "9cac217e9e9f4f4c85f14ee51165a77c580165bd4a34b202389169bbe61a1ced",
|
||||
"blk.25.ffn_down.weight": "a0f36949b663e80849581dfb71e7babcc73580793bbcb0c80ab26d5a6e000359",
|
||||
"blk.25.ffn_gate.weight": "df4d1be4d50d6afe5ad3ef0d0e0fac76a33e85c963dea769641d612dd53e7d13",
|
||||
"blk.25.ffn_up.weight": "992da76be762632e25ebc5ef4d03728eece1b43f7c4e31827df19ca724aea694",
|
||||
"blk.26.attn_k.weight": "34199ff856ac32a500c754539d070258574192a34ecba87a182897cb59fdff52",
|
||||
"blk.26.attn_norm.weight": "a8e9dfb2dae5d22b5c0aec5f3675991c0e3c3e6a44153db2579136b73f456e00",
|
||||
"blk.26.attn_output.weight": "1c4f257ffb0d7db0f11cfb275e38b4af736917b43ad82de1badce3f1d227da4d",
|
||||
"blk.26.attn_q.weight": "33d55786274c2e718cf61e8fbecf3dfa5ee0c208f0b716d42b061f55459acb3c",
|
||||
"blk.26.attn_v.weight": "684b636939cd4ffcfec5a6238a0790ffa43d853c95783af9b9e8275e74071a7a",
|
||||
"blk.26.ffn_down.weight": "89d0bf066db154e6d312b5433aed1714f6a28b40f4c52e3e1530ee07703303c8",
|
||||
"blk.26.ffn_gate.weight": "393d649bebe5e2940e1b043649f6c860b4b8b9f380f30e9da1744a830f358156",
|
||||
"blk.26.ffn_up.weight": "179edc85ababd9d8440cc6093eecd1004290aa1cb96434b26ecf7585b6cca17b",
|
||||
"blk.27.attn_k.weight": "334841445a7f1e14731b08f56eb0b1f0938c63823d28bc6d078c4c5f05b36f19",
|
||||
"blk.27.attn_norm.weight": "57344471bbda2e9deffdfdb2dd05a07aa47f8761e24de53525588639145bf551",
|
||||
"blk.27.attn_output.weight": "506126af9ee54b535d49f97e36f630e74834f480329f098d6d62e96246d8d65a",
|
||||
"blk.27.attn_q.weight": "dd984df1acb4783849e25ba7ae378bfd385cd9efc540fb798cd5bdd873f0118f",
|
||||
"blk.27.attn_v.weight": "b4b3fe9a4455d34c297ff20a2f537b647cef424741d840a747b265f23d320ac0",
|
||||
"blk.27.ffn_down.weight": "621fdb185ba0d35ba5476dae73d2c81ec1482a0e878d5bfd5c3b29fe837af013",
|
||||
"blk.27.ffn_gate.weight": "e4fbab45f2ec506fa374103251a0bdb7baa6f576080bdd796f3e9db92098e08f",
|
||||
"blk.27.ffn_up.weight": "a0c57e463e988002bbd6a6c6792baa21a65e6f89ae303a2c301951b0ae6e4bbe",
|
||||
"blk.28.attn_k.weight": "bac36cbd52ec5056841663865e1291ddab4b47ef9a2544dd285d4503bfb0e4a0",
|
||||
"blk.28.attn_norm.weight": "5774a9df2bbb2e86d1f70179c7b92d81e1f401160148b3328fb64db6646a5425",
|
||||
"blk.28.attn_output.weight": "e8712622d1569557000c75f26c3f55fad267fd300463c2c2cfe3afbfa1c8f908",
|
||||
"blk.28.attn_q.weight": "11677751fddee52cc739699c02836f7be54d96038be4240be5d4f53d00161608",
|
||||
"blk.28.attn_v.weight": "e5ee459b8958d65e1445997b9aa1e90e2f5d17761ebcf5357313119a45322507",
|
||||
"blk.28.ffn_down.weight": "3934518f9f85292da8475fe38a8edcbfc4e24ac56c351b472d6351f98750871e",
|
||||
"blk.28.ffn_gate.weight": "6ba735d57e98d0847e487f25ffaa25256deaa8abec76f428cb70bd9774279d83",
|
||||
"blk.28.ffn_up.weight": "977fae6e1e5353114fc645dd98429464749758765cbc6e6457593d596e57850c",
|
||||
"blk.29.attn_k.weight": "8122a457307d580ad6f1e0acea09a2f593d97f595ba0d6737f5fea16d2433642",
|
||||
"blk.29.attn_norm.weight": "d626f721e05aa1202439b01027031d4caf1adace61ed37870a277cb6297c77cc",
|
||||
"blk.29.attn_output.weight": "7fb7122ab1b6b1e6615ca746897da27bc52c92cb70d3147183cdde61795b72b3",
|
||||
"blk.29.attn_q.weight": "be43e94ff6b6e391024dc824101efa0ddf4005d5b002ac26cb03765c0c73c2fa",
|
||||
"blk.29.attn_v.weight": "af93c85ebff908f74f9935b81bde0516ca487c84139868a1ce079c3ae20036b1",
|
||||
"blk.29.ffn_down.weight": "39dae12340ed3120bd19c495fe0872b559613641e41fde69d02d8631900b84c0",
|
||||
"blk.29.ffn_gate.weight": "36fd482439840ef197c9f3b8905d86acfcea49bcf018544106ca465d4bf8d5c7",
|
||||
"blk.29.ffn_up.weight": "5243fbdfdc1e2a1dd84b6210a9869d18a014db9088897e345240cdc99990bd5d",
|
||||
"blk.30.attn_k.weight": "948f263616bd3788b2b968baafd69b9c5bd1b77578665f096c4b7e247b4cea42",
|
||||
"blk.30.attn_norm.weight": "e168df981e744874ff303faf2eb470e5f6868c2040ba5f383f6c5148669975e7",
|
||||
"blk.30.attn_output.weight": "4cf0ccca04b792573b756655a24fc89cfb1f272da8305633f0bc66ef14990b93",
|
||||
"blk.30.attn_q.weight": "21e07d6cba6c50d65350289258209717174a13c42be57e8141d69712cbaf32c1",
|
||||
"blk.30.attn_v.weight": "65a8ca29c7237b3182ccf03e2fc94e84f9a53d0e160fb679ab401c853170dd9c",
|
||||
"blk.30.ffn_down.weight": "8b00500a6d00d84058f6658ee1d6f06fb4fcae2f90d4341792259362923b3c13",
|
||||
"blk.30.ffn_gate.weight": "5bc0e19ab7a31b50ac2118ad1b36e31055271a322cd8ff661d47c3ac0210703c",
|
||||
"blk.30.ffn_up.weight": "f37a0561955725bd59ee2d064fa9f4e00a12a1b620b624db3bc3add5330bc321",
|
||||
"blk.31.attn_k.weight": "9a5663edda227f5d87533897146764f8e8a7481b9e71fae197c39204f8463221",
|
||||
"blk.31.attn_norm.weight": "060a4f438a1ee5e220b5b5278ad2f5c085a428bf38c515766781815597c87529",
|
||||
"blk.31.attn_output.weight": "6ada5d3cad9dea4780ffbb43302bb6ccc2f24eddd0fc4f5f84c9ce0fc0c6e5dd",
|
||||
"blk.31.attn_q.weight": "bb5d08c08603907981ad388d5d8b70fcc9b98034ba264b8474c8890cc0297af0",
|
||||
"blk.31.attn_v.weight": "e01b4252ea9c6a889c32b21144b441a347464d04536ef4f6572425be55759796",
|
||||
"blk.31.ffn_down.weight": "8ba4d679c36e93ba65ba03180385ef35ea86b3b7cdf2fded9df59369f1c09630",
|
||||
"blk.31.ffn_gate.weight": "e5b41dc93645f8b5e8eebae3ada3ea43a18f97ce2654228655170b07b463ccb0",
|
||||
"blk.31.ffn_up.weight": "25b88cdddc8b547af294ed107d3d1312e90b983cae87936fa6062ecd8ea02539",
|
||||
"blk.32.attn_k.weight": "4bcf86dc0858c8ca2fbdf6aa76674d43eb698f78979fdc1a38f556a7af1facc4",
|
||||
"blk.32.attn_norm.weight": "cdcc12f3b8b9773c6722736bfb748a2729230b21478cbcc4104859d3148df815",
|
||||
"blk.32.attn_output.weight": "d43f1196822995ed89a9365c97054753a8b30ce20b6e273c8edcc42673a1e141",
|
||||
"blk.32.attn_q.weight": "ebf2972bb3865cbc5be4840113a322089752038344beab2a0122c7cb4fb399b6",
|
||||
"blk.32.attn_v.weight": "714db81704ff34fa137512903c1013acee7877467473e46600728b9240582eb7",
|
||||
"blk.32.ffn_down.weight": "2cde3da1258bb170a79d5d3cdfe10c86a71eb34b77da46b74c5ed71e7f4fe274",
|
||||
"blk.32.ffn_gate.weight": "c7e1ed792532613ff9d4e5834b6536e2e0f47df2303bc0fdaa90aac0c1f4e8db",
|
||||
"blk.32.ffn_up.weight": "d8d6f13fe66a716e28f79101a29817f0c0d6f99969a6f017d51bafd1a16c600c",
|
||||
"blk.33.attn_k.weight": "a0a28f6cbca88da00cab2ca37094d9b0503bf9defdae77b91895b911c408cbb6",
|
||||
"blk.33.attn_norm.weight": "0251200c24cc8445607ace6dc8c5aa0566567997262b7cca53a11ac23cc564b2",
|
||||
"blk.33.attn_output.weight": "b2423205bdf6a1096d43c44d8d12f1a84fcd4e1bb70fcf6dc8542b8b8a71a13c",
|
||||
"blk.33.attn_q.weight": "00b425c3ef71065ce5e0234e702bf38143b4952da78a85f52ab2c2e3073d97ab",
|
||||
"blk.33.attn_v.weight": "035edd2335df816c42c765a5e66b9d9b9e15a822a8dc1863508145499c942c14",
|
||||
"blk.33.ffn_down.weight": "4894a923a3db75bae4496ba3ce5f28796ad31fe33996a066271fb8654964310e",
|
||||
"blk.33.ffn_gate.weight": "8f6c819b8bbfbe3357fae89e1ac5a3d58be85b3b04be3bacf7b62775869046ff",
|
||||
"blk.33.ffn_up.weight": "257c3544b5b544fd5d839665bf5caf107a329b59dbc3751efcaa24ae63c56179",
|
||||
"blk.34.attn_k.weight": "b6cd8bba892e38dac4a2ebc3ba1bce49e71b967fc436fde30c6d76f54a18935f",
|
||||
"blk.34.attn_norm.weight": "2b3c8e60a064cba9955752bbbbdd92c71ba5c2f1bd721097bdbe88b5abc68787",
|
||||
"blk.34.attn_output.weight": "8cc272551c9aaca9db5a660c6927bab94a0243d74a30b2bc165f06bd577714ea",
|
||||
"blk.34.attn_q.weight": "74b561eb4792484e6a94b58fe2583848c3ae28ff2f1bf3d02939a0cfdfa49990",
|
||||
"blk.34.attn_v.weight": "dba19e24ff05154dc5a1f55c023729303a583d13d68732ce22ea74d4410dc8f0",
|
||||
"blk.34.ffn_down.weight": "76eca5dfeb274c35774e0bf9f22ee420ed9085c8e99aa2cd5a236e4918b44c61",
|
||||
"blk.34.ffn_gate.weight": "9af0862d5fcbc24732846488e653db8242a467765c0cdbc00332b3a40256b4a6",
|
||||
"blk.34.ffn_up.weight": "2a03126bf73587eaba99ece2066103d12e47bcd4ce30ff6c17b2f383b81d40df",
|
||||
"blk.35.attn_k.weight": "52513fc0cd4e997a842729af7d21dd09399bce0a339558374738be266d0fa2f0",
|
||||
"blk.35.attn_norm.weight": "e5281fa911964263ccf1630b14762edbd41d0b9472d6ec695fc600fed4892c35",
|
||||
"blk.35.attn_output.weight": "b391d6705d5dc6f48326b5fd16573f679edf64109d86fb729a498819676590ca",
|
||||
"blk.35.attn_q.weight": "d16446921966db9b0e0539626ad22a2511ace780e59379d6a4162d8c5441440b",
|
||||
"blk.35.attn_v.weight": "9d8cdf23ffdb0c5c74106843390b94b24c9f33ef0eb9998d39f78c73390101ea",
|
||||
"blk.35.ffn_down.weight": "938eb6301f7bbf162d7dd965682a5ed11d0a4a530c6fedd7e5469ce80012fc17",
|
||||
"blk.35.ffn_gate.weight": "5ad84f5a0c8edcfea1ecf1a3e3d21d85ceda0c4ad9e3c6ca68885eeff8ed3c2f",
|
||||
"blk.35.ffn_up.weight": "1c4330d9dc71bf4c98812c34356c51f520f47610a534152aa6d29284b758090d",
|
||||
"blk.36.attn_k.weight": "ef720655e5ca2465f13db2dfc4732fb4ef2c9d53acde52f514fd4f301e974081",
|
||||
"blk.36.attn_norm.weight": "88f4b9310b3c8c2644e3029160cd35678c79dfa59280430e03f5c29a6fe84a58",
|
||||
"blk.36.attn_output.weight": "aec6f915fffd7bb72cd783273e871b4f09605950089d45e72059d1316b6c4b01",
|
||||
"blk.36.attn_q.weight": "72f9408a2405d42f8db6ce5fcf1d26a3660b6f225fc60e77d0277109cfcb82ed",
|
||||
"blk.36.attn_v.weight": "0f3b3d851dc44b3893ef53f6cca5b4acc9658bacfe1cc2d13c3d704ddd409b67",
|
||||
"blk.36.ffn_down.weight": "470aec48ce8c5129a6654d9fd26fcae72776f9fc1429a8bb05818072a876475d",
|
||||
"blk.36.ffn_gate.weight": "7f5f296d09cf55679767b5d15de3eff489c456782119f25204be4b1647f18dcf",
|
||||
"blk.36.ffn_up.weight": "b7ef74a1f7ffb4982711d93f1787be3a70edc3d2358d5203c41d8900508037d4",
|
||||
"blk.37.attn_k.weight": "c4ffa5412e4ff2dcfe1aed991c1f54169fd171a4c7638e4b9f21a1ca64c5e1d6",
|
||||
"blk.37.attn_norm.weight": "4eb6c888d841cccfacf5b963f8611120f6ff24b84af0b5714fd9ab36dcda422f",
|
||||
"blk.37.attn_output.weight": "db2a7bbf9682f9f6eea672dae8e150738f1bf74dbc80edc7022017a3f040c8ac",
|
||||
"blk.37.attn_q.weight": "e38c0462aff139afcbab289189823527e453abc9e541154adde5e7af88cacf0b",
|
||||
"blk.37.attn_v.weight": "952eb2492ed452a72f96bcc12d4b2affad9dfdf46ee39ce4a5d7b57a5dc301e5",
|
||||
"blk.37.ffn_down.weight": "25f23a8fbc44febf6dc4848fd7fe03a580e2822bd3b3b5a51f4990826bfe3e4e",
|
||||
"blk.37.ffn_gate.weight": "707da5eb40118b035305d3262444382351f170a20a537386a70e90c5a83a7817",
|
||||
"blk.37.ffn_up.weight": "d2d2ba5cfc4ef47338dd7384219e22bf030a5a2209e0354d88f5bbaaafd20e87",
|
||||
"blk.38.attn_k.weight": "abc4bb189dedf7ce661e79028427623a4f91ac091c2cd60e31b58bc62b1cda71",
|
||||
"blk.38.attn_norm.weight": "9f4803a7d03fd40fcb83d85f84eb1d5682ea4e5bb084f210c02850675d804c3d",
|
||||
"blk.38.attn_output.weight": "77cb66007f1a41df7135d0e7f900ceb499c2f667dfc3f1a6ac01a3203bbd3ccf",
|
||||
"blk.38.attn_q.weight": "d94a8b26cd375bf2bcaa76597e314aa8268ee50a479d00931e5e0e021feadb5d",
|
||||
"blk.38.attn_v.weight": "660c907888bc5016dc69b7d35fe6f55c7ded697c93be0e2d332a2f17aff88758",
|
||||
"blk.38.ffn_down.weight": "6f06173bae5b00ffaf88ef383619a8b9c6a8d0d5c6494695d17f6c1de1a68a13",
|
||||
"blk.38.ffn_gate.weight": "89f99be149d03f116527bfcabe073c50001c874de40fb6e817f6619027f3cd05",
|
||||
"blk.38.ffn_up.weight": "8d57557c8d5e2d2688b73f01dddf1ce8d5194990cda6358153320aea88aac7f8",
|
||||
"blk.39.attn_k.weight": "21be09c988b46c8393e6c2ec9230f3b5136eb7607dd1953ba92d0811c2f0dd75",
|
||||
"blk.39.attn_norm.weight": "ba7c1912dd1c4e2d16917201f62396fd0600e4a451137eaddff255548c209abd",
|
||||
"blk.39.attn_output.weight": "acfaf4abb3fd27fd899b5563c3877f176b597d8f6cdb2f2fd3f3a0bd4da15ed6",
|
||||
"blk.39.attn_q.weight": "e8adbc140d4c8f0db2a27ca584c5531d5b1e080555fe627e34d80d0814a92bed",
|
||||
"blk.39.attn_v.weight": "92f96b0e1f724e73a0f90a76c145654418844c04a6d4b14c05eb5af8a62bf8dc",
|
||||
"blk.39.ffn_down.weight": "4d9ee7c65fc16fe95d10c47b79ac6a525741947600a64b5fcea5d300a82c50de",
|
||||
"blk.39.ffn_gate.weight": "7e18507989f39b32191133d2657c2ee3b74f42f070579204d727eb72215793d1",
|
||||
"blk.39.ffn_up.weight": "22cda752269c9757ba918abede1df95bb0f83a5c772dea13c8deea3d5f2723d9",
|
||||
"output_norm.weight": "2858cf0e39d32caf52b7861378ace076000241e147f10b9eb21d8a5cd149e3cb"
|
||||
}
|
||||
@@ -100,6 +100,8 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
|
||||
t.Pre = "deepseek-llm"
|
||||
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
||||
t.Pre = "deepseek-coder"
|
||||
case "1ff7f41064896984db5d1bb6ff64fa4bc29007d08c1b439e505b7392777a319e":
|
||||
t.Pre = "qwen2"
|
||||
case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
|
||||
// noop, empty pretokenizer
|
||||
default:
|
||||
|
||||
@@ -718,23 +718,18 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
||||
func LibraryDirs() []string {
|
||||
// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
|
||||
// This can be simplified once we no longer carry runners as payloads
|
||||
paths := []string{}
|
||||
appExe, err := os.Executable()
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
slog.Warn("failed to lookup executable path", "error", err)
|
||||
} else {
|
||||
appRelative := filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
||||
if _, err := os.Stat(appRelative); err == nil {
|
||||
paths = append(paths, appRelative)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
rDir := runners.Locate()
|
||||
if err != nil {
|
||||
slog.Warn("unable to locate gpu dependency libraries", "error", err)
|
||||
} else {
|
||||
paths = append(paths, filepath.Dir(rDir))
|
||||
|
||||
lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
||||
if _, err := os.Stat(lib); err != nil {
|
||||
return nil
|
||||
}
|
||||
return paths
|
||||
|
||||
return []string{lib}
|
||||
}
|
||||
|
||||
func GetSystemInfo() SystemInfo {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
### Getting Started
|
||||
* [Quickstart](../README.md#quickstart)
|
||||
* [Examples](../examples)
|
||||
* [Examples](./examples.md)
|
||||
* [Importing models](./import.md)
|
||||
* [Linux Documentation](./linux.md)
|
||||
* [Windows Documentation](./windows.md)
|
||||
|
||||
110
docs/api.md
110
docs/api.md
@@ -928,14 +928,25 @@ A single JSON object is returned:
|
||||
POST /api/create
|
||||
```
|
||||
|
||||
Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response.
|
||||
Create a model from:
|
||||
* another model;
|
||||
* a safetensors directory; or
|
||||
* a GGUF file.
|
||||
|
||||
If you are creating a model from a safetensors directory or from a GGUF file, you must [create a blob](#create-a-blob) for each of the files and then use the file name and SHA256 digest associated with each blob in the `files` field.
|
||||
|
||||
### Parameters
|
||||
|
||||
- `model`: name of the model to create
|
||||
- `modelfile` (optional): contents of the Modelfile
|
||||
- `from`: (optional) name of an existing model to create the new model from
|
||||
- `files`: (optional) a dictionary of file names to SHA256 digests of blobs to create the model from
|
||||
- `adapters`: (optional) a dictionary of file names to SHA256 digests of blobs for LORA adapters
|
||||
- `template`: (optional) the prompt template for the model
|
||||
- `license`: (optional) a string or list of strings containing the license or licenses for the model
|
||||
- `system`: (optional) a string containing the system prompt for the model
|
||||
- `parameters`: (optional) a dictionary of parameters for the model (see [Modelfile](./modelfile.md#valid-parameters-and-values) for a list of parameters)
|
||||
- `messages`: (optional) a list of message objects used to create a conversation
|
||||
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
- `path` (optional): path to the Modelfile
|
||||
- `quantize` (optional): quantize a non-quantized (e.g. float16) model
|
||||
|
||||
#### Quantization types
|
||||
@@ -961,14 +972,15 @@ Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `m
|
||||
|
||||
#### Create a new model
|
||||
|
||||
Create a new model from a `Modelfile`.
|
||||
Create a new model from an existing model.
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/create -d '{
|
||||
"model": "mario",
|
||||
"modelfile": "FROM llama3\nSYSTEM You are mario from Super Mario Bros."
|
||||
"from": "llama3.2",
|
||||
"system": "You are Mario from Super Mario Bros."
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -999,7 +1011,7 @@ Quantize a non-quantized model.
|
||||
```shell
|
||||
curl http://localhost:11434/api/create -d '{
|
||||
"model": "llama3.1:quantized",
|
||||
"modelfile": "FROM llama3.1:8b-instruct-fp16",
|
||||
"from": "llama3.1:8b-instruct-fp16",
|
||||
"quantize": "q4_K_M"
|
||||
}'
|
||||
```
|
||||
@@ -1019,52 +1031,112 @@ A stream of JSON objects is returned:
|
||||
{"status":"success"}
|
||||
```
|
||||
|
||||
#### Create a model from GGUF
|
||||
|
||||
### Check if a Blob Exists
|
||||
Create a model from a GGUF file. The `files` parameter should be filled out with the file name and SHA256 digest of the GGUF file you wish to use. Use [/api/blobs/:digest](#push-a-blob) to push the GGUF file to the server before calling this API.
|
||||
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/create -d '{
|
||||
"model": "my-gguf-model",
|
||||
"files": {
|
||||
"test.gguf": "sha256:432f310a77f4650a88d0fd59ecdd7cebed8d684bafea53cbff0473542964f0c3"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
A stream of JSON objects is returned:
|
||||
|
||||
```
|
||||
{"status":"parsing GGUF"}
|
||||
{"status":"using existing layer sha256:432f310a77f4650a88d0fd59ecdd7cebed8d684bafea53cbff0473542964f0c3"}
|
||||
{"status":"writing manifest"}
|
||||
{"status":"success"}
|
||||
```
|
||||
|
||||
|
||||
#### Create a model from a Safetensors directory
|
||||
|
||||
The `files` parameter should include a dictionary of files for the safetensors model which includes the file names and SHA256 digest of each file. Use [/api/blobs/:digest](#push-a-blob) to first push each of the files to the server before calling this API. Files will remain in the cache until the Ollama server is restarted.
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/create -d '{
|
||||
"model": "fred",
|
||||
"files": {
|
||||
"config.json": "sha256:dd3443e529fb2290423a0c65c2d633e67b419d273f170259e27297219828e389",
|
||||
"generation_config.json": "sha256:88effbb63300dbbc7390143fbbdd9d9fa50587b37e8bfd16c8c90d4970a74a36",
|
||||
"special_tokens_map.json": "sha256:b7455f0e8f00539108837bfa586c4fbf424e31f8717819a6798be74bef813d05",
|
||||
"tokenizer.json": "sha256:bbc1904d35169c542dffbe1f7589a5994ec7426d9e5b609d07bab876f32e97ab",
|
||||
"tokenizer_config.json": "sha256:24e8a6dc2547164b7002e3125f10b415105644fcf02bf9ad8b674c87b1eaaed6",
|
||||
"model.safetensors": "sha256:1ff795ff6a07e6a68085d206fb84417da2f083f68391c2843cd2b8ac6df8538f"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
A stream of JSON objects is returned:
|
||||
|
||||
```shell
|
||||
{"status":"converting model"}
|
||||
{"status":"creating new layer sha256:05ca5b813af4a53d2c2922933936e398958855c44ee534858fcfd830940618b6"}
|
||||
{"status":"using autodetected template llama3-instruct"}
|
||||
{"status":"using existing layer sha256:56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb"}
|
||||
{"status":"writing manifest"}
|
||||
{"status":"success"}
|
||||
```
|
||||
|
||||
## Check if a Blob Exists
|
||||
|
||||
```shell
|
||||
HEAD /api/blobs/:digest
|
||||
```
|
||||
|
||||
Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not ollama.com.
|
||||
Ensures that the file blob (Binary Large Object) used with create a model exists on the server. This checks your Ollama server and not ollama.com.
|
||||
|
||||
#### Query Parameters
|
||||
### Query Parameters
|
||||
|
||||
- `digest`: the SHA256 digest of the blob
|
||||
|
||||
#### Examples
|
||||
### Examples
|
||||
|
||||
##### Request
|
||||
#### Request
|
||||
|
||||
```shell
|
||||
curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
|
||||
```
|
||||
|
||||
##### Response
|
||||
#### Response
|
||||
|
||||
Return 200 OK if the blob exists, 404 Not Found if it does not.
|
||||
|
||||
### Create a Blob
|
||||
## Push a Blob
|
||||
|
||||
```shell
|
||||
POST /api/blobs/:digest
|
||||
```
|
||||
|
||||
Create a blob from a file on the server. Returns the server file path.
|
||||
Push a file to the Ollama server to create a "blob" (Binary Large Object).
|
||||
|
||||
#### Query Parameters
|
||||
### Query Parameters
|
||||
|
||||
- `digest`: the expected SHA256 digest of the file
|
||||
|
||||
#### Examples
|
||||
### Examples
|
||||
|
||||
##### Request
|
||||
#### Request
|
||||
|
||||
```shell
|
||||
curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
|
||||
curl -T model.gguf -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
|
||||
```
|
||||
|
||||
##### Response
|
||||
#### Response
|
||||
|
||||
Return 201 Created if the blob was successfully created, 400 Bad Request if the digest used is not expected.
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ Numeric IDs may be used, however ordering may vary, so UUIDs are more reliable.
|
||||
You can discover the UUID of your GPUs by running `nvidia-smi -L` If you want to
|
||||
ignore the GPUs and force CPU usage, use an invalid GPU ID (e.g., "-1")
|
||||
|
||||
### Laptop Suspend Resume
|
||||
### Linux Suspend Resume
|
||||
|
||||
On linux, after a suspend/resume cycle, sometimes Ollama will fail to discover
|
||||
your NVIDIA GPU, and fallback to running on the CPU. You can workaround this
|
||||
|
||||
@@ -155,7 +155,6 @@ PARAMETER <parameter> <parametervalue>
|
||||
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
|
||||
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
|
||||
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
|
||||
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
|
||||
| num_predict | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation) | int | num_predict 42 |
|
||||
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
||||
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
||||
|
||||
@@ -165,6 +165,8 @@ var (
|
||||
IntelGPU = Bool("OLLAMA_INTEL_GPU")
|
||||
// MultiUserCache optimizes prompt caching for multi-user scenarios
|
||||
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
|
||||
// Enable the new Ollama engine
|
||||
NewRunners = Bool("OLLAMA_NEW_RUNNERS")
|
||||
)
|
||||
|
||||
func String(s string) func() string {
|
||||
@@ -250,6 +252,7 @@ func AsMap() map[string]EnvVar {
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
"OLLAMA_NEW_RUNNERS": {"OLLAMA_NEW_RUNNERS", NewRunners(), "Enable the new Ollama engine"},
|
||||
|
||||
// Informational
|
||||
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
package llm
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/util/bufioutil"
|
||||
"github.com/ollama/ollama/fs/util/bufioutil"
|
||||
)
|
||||
|
||||
type GGML struct {
|
||||
@@ -19,145 +19,168 @@ type GGML struct {
|
||||
|
||||
type model interface {
|
||||
KV() KV
|
||||
Tensors() *Tensors
|
||||
Tensors() Tensors
|
||||
}
|
||||
|
||||
type KV map[string]any
|
||||
|
||||
func (kv KV) u64(key string) uint64 {
|
||||
switch v := kv[key].(type) {
|
||||
case uint64:
|
||||
return v
|
||||
case uint32:
|
||||
return uint64(v)
|
||||
case float64:
|
||||
return uint64(v)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (kv KV) Architecture() string {
|
||||
if s, ok := kv["general.architecture"].(string); ok {
|
||||
return s
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
return kv.String("general.architecture", "unknown")
|
||||
}
|
||||
|
||||
func (kv KV) Kind() string {
|
||||
if s, ok := kv["general.type"].(string); ok {
|
||||
return s
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
return kv.String("general.type", "unknown")
|
||||
}
|
||||
|
||||
func (kv KV) ParameterCount() uint64 {
|
||||
return kv.u64("general.parameter_count")
|
||||
return keyValue[uint64](kv, "general.parameter_count")
|
||||
}
|
||||
|
||||
func (kv KV) FileType() fileType {
|
||||
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
||||
return fileType(uint32(u64))
|
||||
if t := kv.Uint("general.file_type"); t > 0 {
|
||||
return fileType(t)
|
||||
}
|
||||
|
||||
return fileTypeUnknown
|
||||
}
|
||||
|
||||
func (kv KV) BlockCount() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.block_count", kv.Architecture()))
|
||||
return uint64(kv.Uint("block_count"))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingLength() uint64 {
|
||||
return uint64(kv.Uint("embedding_length"))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCount() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.attention.head_count", kv.Architecture()))
|
||||
return uint64(kv.Uint("attention.head_count"))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCountKV() uint64 {
|
||||
if headCountKV := kv.u64(fmt.Sprintf("%s.attention.head_count_kv", kv.Architecture())); headCountKV > 0 {
|
||||
return headCountKV
|
||||
}
|
||||
|
||||
return 1
|
||||
return uint64(kv.Uint("attention.head_count_kv", 1))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCount() uint64 {
|
||||
if heads := kv.HeadCount(); heads > 0 {
|
||||
return kv.EmbeddingLength() / kv.HeadCount()
|
||||
return kv.EmbeddingLength() / heads
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountK() uint64 {
|
||||
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
|
||||
return k
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
return uint64(kv.Uint("attention.key_length", uint32(kv.EmbeddingHeadCount())))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountV() uint64 {
|
||||
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
|
||||
return v
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
return uint64(kv.Uint("attention.value_length", uint32(kv.EmbeddingHeadCount())))
|
||||
}
|
||||
|
||||
func (kv KV) GQA() uint64 {
|
||||
return kv.HeadCount() / kv.HeadCountKV()
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingLength() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.embedding_length", kv.Architecture()))
|
||||
}
|
||||
|
||||
func (kv KV) ContextLength() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
|
||||
return uint64(kv.Uint("context_length"))
|
||||
}
|
||||
|
||||
func (kv KV) ChatTemplate() string {
|
||||
s, _ := kv["tokenizer.chat_template"].(string)
|
||||
return kv.String("tokenizer.chat_template")
|
||||
}
|
||||
|
||||
func (kv KV) String(key string, defaultValue ...string) string {
|
||||
return keyValue(kv, key, append(defaultValue, "")...)
|
||||
}
|
||||
|
||||
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
|
||||
return keyValue(kv, key, append(defaultValue, 0)...)
|
||||
}
|
||||
|
||||
func (kv KV) Float(key string, defaultValue ...float32) float32 {
|
||||
return keyValue(kv, key, append(defaultValue, 0)...)
|
||||
}
|
||||
|
||||
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
|
||||
r := keyValue(kv, key, &array{})
|
||||
s := make([]string, r.size)
|
||||
for i := range r.size {
|
||||
s[i] = r.values[i].(string)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
type Tensors struct {
|
||||
Items []*Tensor
|
||||
Offset uint64
|
||||
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
|
||||
r := keyValue(kv, key, &array{})
|
||||
s := make([]uint32, r.size)
|
||||
for i := range r.size {
|
||||
s[i] = uint32(r.values[i].(int32))
|
||||
}
|
||||
|
||||
layers map[string]Layer
|
||||
layersOnce sync.Once
|
||||
return s
|
||||
}
|
||||
|
||||
func (ts *Tensors) Layers() map[string]Layer {
|
||||
ts.layersOnce.Do(func() {
|
||||
ts.layers = make(map[string]Layer)
|
||||
for _, t := range ts.Items {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
|
||||
if len(parts) > index+2 {
|
||||
// blk and mm should have a number after them, join it
|
||||
parts = append(
|
||||
[]string{strings.Join(parts[:index+2], ".")},
|
||||
parts[index+2:]...)
|
||||
}
|
||||
}
|
||||
func keyValue[T string | uint32 | uint64 | float32 | *array](kv KV, key string, defaultValue ...T) T {
|
||||
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
||||
key = kv.Architecture() + "." + key
|
||||
}
|
||||
|
||||
if _, ok := ts.layers[parts[0]]; !ok {
|
||||
ts.layers[parts[0]] = make(Layer)
|
||||
}
|
||||
if val, ok := kv[key]; ok {
|
||||
return val.(T)
|
||||
}
|
||||
|
||||
ts.layers[parts[0]][strings.Join(parts[1:], ".")] = t
|
||||
slog.Warn("key not found", "key", key, "default", defaultValue[0])
|
||||
return defaultValue[0]
|
||||
}
|
||||
|
||||
type Tensors struct {
|
||||
items []*Tensor
|
||||
Offset uint64
|
||||
}
|
||||
|
||||
func (s Tensors) Items(prefix ...string) []*Tensor {
|
||||
if len(prefix) == 0 {
|
||||
return s.items
|
||||
}
|
||||
|
||||
var items []*Tensor
|
||||
for _, t := range s.items {
|
||||
if strings.HasPrefix(t.Name, prefix[0]) {
|
||||
items = append(items, t)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return ts.layers
|
||||
return items
|
||||
}
|
||||
|
||||
func (ts Tensors) Layers() map[string]Layer {
|
||||
layers := make(map[string]Layer)
|
||||
for _, t := range ts.items {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if i := slices.Index(parts, "blk"); i > 0 {
|
||||
parts = append([]string{
|
||||
strings.Join(parts[:i], "."),
|
||||
strings.Join(parts[i:i+2], "."),
|
||||
}, parts[i+2:]...)
|
||||
} else if i == 0 {
|
||||
parts = append([]string{
|
||||
strings.Join(parts[i:i+2], "."),
|
||||
}, parts[i+2:]...)
|
||||
}
|
||||
|
||||
if _, ok := layers[parts[0]]; !ok {
|
||||
layers[parts[0]] = make(Layer)
|
||||
}
|
||||
|
||||
layers[parts[0]][strings.Join(parts[1:], ".")] = t
|
||||
}
|
||||
|
||||
return layers
|
||||
}
|
||||
|
||||
type Layer map[string]*Tensor
|
||||
|
||||
func (l Layer) size() (size uint64) {
|
||||
func (l Layer) Size() (size uint64) {
|
||||
for _, t := range l {
|
||||
size += t.Size()
|
||||
}
|
||||
@@ -255,8 +278,6 @@ func (t Tensor) typeSize() uint64 {
|
||||
return 8
|
||||
case 29: // IQ1_M
|
||||
return blockSize/8 + blockSize/16 + blockSize/32
|
||||
case 30: // BF16
|
||||
return 2
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
@@ -295,7 +316,7 @@ const (
|
||||
|
||||
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
||||
|
||||
func DetectGGMLType(b []byte) string {
|
||||
func DetectContentType(b []byte) string {
|
||||
switch binary.LittleEndian.Uint32(b[:4]) {
|
||||
case FILE_MAGIC_GGML:
|
||||
return "ggml"
|
||||
@@ -312,12 +333,12 @@ func DetectGGMLType(b []byte) string {
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeGGML decodes a GGML model from the given reader.
|
||||
// Decode decodes a GGML model from the given reader.
|
||||
//
|
||||
// It collects array values for arrays with a size less than or equal to
|
||||
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
||||
// the maxArraySize is negative, all arrays are collected.
|
||||
func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
if maxArraySize == 0 {
|
||||
maxArraySize = 1024
|
||||
}
|
||||
@@ -331,10 +352,6 @@ func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
|
||||
var c container
|
||||
switch magic {
|
||||
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
|
||||
return nil, 0, ErrUnsupportedFormat
|
||||
case FILE_MAGIC_GGLA:
|
||||
c = &containerGGLA{}
|
||||
case FILE_MAGIC_GGUF_LE:
|
||||
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
|
||||
case FILE_MAGIC_GGUF_BE:
|
||||
@@ -530,21 +547,20 @@ func (llm GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partia
|
||||
}
|
||||
|
||||
// SupportsKVCacheType checks if the requested cache type is supported
|
||||
func (ggml GGML) SupportsKVCacheType(cacheType string) bool {
|
||||
validKVCacheTypes := []string{"f16", "q8_0", "q4_0"}
|
||||
return slices.Contains(validKVCacheTypes, cacheType)
|
||||
func (llm GGML) SupportsKVCacheType(cacheType string) bool {
|
||||
return slices.Contains([]string{"f16", "q8_0", "q4_0"}, cacheType)
|
||||
}
|
||||
|
||||
// SupportsFlashAttention checks if the model supports flash attention
|
||||
func (ggml GGML) SupportsFlashAttention() bool {
|
||||
_, isEmbedding := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())]
|
||||
func (llm GGML) SupportsFlashAttention() bool {
|
||||
_, isEmbedding := llm.KV()[fmt.Sprintf("%s.pooling_type", llm.KV().Architecture())]
|
||||
if isEmbedding {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check head counts match and are non-zero
|
||||
headCountK := ggml.KV().EmbeddingHeadCountK()
|
||||
headCountV := ggml.KV().EmbeddingHeadCountV()
|
||||
headCountK := llm.KV().EmbeddingHeadCountK()
|
||||
headCountV := llm.KV().EmbeddingHeadCountV()
|
||||
return headCountK != 0 && headCountV != 0 && headCountK == headCountV
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package llm
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -8,10 +8,9 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
@@ -110,9 +109,9 @@ func (llm *gguf) KV() KV {
|
||||
return llm.kv
|
||||
}
|
||||
|
||||
func (llm *gguf) Tensors() *Tensors {
|
||||
return &Tensors{
|
||||
Items: llm.tensors,
|
||||
func (llm *gguf) Tensors() Tensors {
|
||||
return Tensors{
|
||||
items: llm.tensors,
|
||||
Offset: llm.tensorOffset,
|
||||
}
|
||||
}
|
||||
@@ -523,7 +522,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||
return err
|
||||
}
|
||||
|
||||
keys := maps.Keys(kv)
|
||||
keys := slices.Collect(maps.Keys(kv))
|
||||
slices.Sort(keys)
|
||||
|
||||
for _, key := range keys {
|
||||
@@ -1,4 +1,4 @@
|
||||
package llm
|
||||
package ggml
|
||||
|
||||
import "fmt"
|
||||
|
||||
@@ -32,10 +32,9 @@ const (
|
||||
fileTypeIQ1_S
|
||||
fileTypeIQ4_NL
|
||||
fileTypeIQ3_S
|
||||
fileTypeIQ3_M
|
||||
fileTypeIQ2_S
|
||||
fileTypeIQ2_M
|
||||
fileTypeIQ4_XS
|
||||
fileTypeIQ2_M
|
||||
fileTypeIQ1_M
|
||||
fileTypeBF16
|
||||
|
||||
@@ -94,8 +93,6 @@ func ParseFileType(s string) (fileType, error) {
|
||||
return fileTypeIQ4_NL, nil
|
||||
case "IQ3_S":
|
||||
return fileTypeIQ3_S, nil
|
||||
case "IQ3_M":
|
||||
return fileTypeIQ3_M, nil
|
||||
case "IQ2_S":
|
||||
return fileTypeIQ2_S, nil
|
||||
case "IQ4_XS":
|
||||
@@ -163,8 +160,6 @@ func (t fileType) String() string {
|
||||
return "IQ4_NL"
|
||||
case fileTypeIQ3_S:
|
||||
return "IQ3_S"
|
||||
case fileTypeIQ3_M:
|
||||
return "IQ3_M"
|
||||
case fileTypeIQ2_S:
|
||||
return "IQ2_S"
|
||||
case fileTypeIQ4_XS:
|
||||
3
go.mod
3
go.mod
@@ -17,12 +17,14 @@ require (
|
||||
require (
|
||||
github.com/agnivade/levenshtein v1.1.1
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
||||
github.com/dlclark/regexp2 v1.11.4
|
||||
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
||||
github.com/google/go-cmp v0.6.0
|
||||
github.com/mattn/go-runewidth v0.0.14
|
||||
github.com/nlpodyssey/gopickle v0.3.0
|
||||
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
||||
golang.org/x/image v0.22.0
|
||||
gonum.org/v1/gonum v0.15.0
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -42,7 +44,6 @@ require (
|
||||
github.com/xtgo/set v1.0.0 // indirect
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
gonum.org/v1/gonum v0.15.0 // indirect
|
||||
gorgonia.org/vecf32 v0.9.0 // indirect
|
||||
gorgonia.org/vecf64 v0.9.0 // indirect
|
||||
)
|
||||
|
||||
2
go.sum
2
go.sum
@@ -42,6 +42,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
|
||||
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
|
||||
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU=
|
||||
github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A=
|
||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
|
||||
@@ -37,8 +37,7 @@ go build -tags avx .
|
||||
```shell
|
||||
# go doesn't recognize `-mfma` as a valid compiler flag
|
||||
# see https://github.com/golang/go/issues/17895
|
||||
go env -w "CGO_CFLAGS_ALLOW=-mfma|-mf16c"
|
||||
go env -w "CGO_CXXFLAGS_ALLOW=-mfma|-mf16c"
|
||||
go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c"
|
||||
go build -tags=avx,avx2 .
|
||||
```
|
||||
|
||||
|
||||
34
llama/amx.h
vendored
34
llama/amx.h
vendored
@@ -1,34 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
||||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
|
||||
#endif
|
||||
51
llama/ggml-blas.h
vendored
51
llama/ggml-blas.h
vendored
@@ -1,51 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
|
||||
|
||||
// number of threads used for conversion to float
|
||||
// for openblas and blis, this will also set the number of threads used for blas operations
|
||||
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
34
llama/ggml-cpu-aarch64.h
vendored
34
llama/ggml-cpu-aarch64.h
vendored
@@ -1,34 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml-cpu-traits.h"
|
||||
#include "ggml.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
|
||||
64
llama/ggml-cpu-traits.h
vendored
64
llama/ggml-cpu-traits.h
vendored
@@ -1,64 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <vector>
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// return true if op part of extra "accelerator"
|
||||
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op);
|
||||
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
namespace ggml::cpu {
|
||||
// register in tensor->extra
|
||||
class tensor_traits {
|
||||
public:
|
||||
virtual ~tensor_traits();
|
||||
virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0;
|
||||
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0;
|
||||
};
|
||||
|
||||
class extra_buffer_type {
|
||||
public:
|
||||
virtual ~extra_buffer_type();
|
||||
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
|
||||
virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0;
|
||||
};
|
||||
} // namespace ggml::cpu
|
||||
|
||||
// implemented in ggml-cpu.cpp.
|
||||
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
|
||||
|
||||
#endif
|
||||
31
llama/ggml-cuda/acc.cuh
vendored
31
llama/ggml-cuda/acc.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_ACC_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
60
llama/ggml-cuda/arange.cu
vendored
60
llama/ggml-cuda/arange.cu
vendored
@@ -1,60 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arange.cuh"
|
||||
|
||||
static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) {
|
||||
// blockIDx.x: idx of ne0 / BLOCK_SIZE
|
||||
int nidx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (nidx >= ne0) {
|
||||
return;
|
||||
}
|
||||
dst[nidx] = start + step * nidx;
|
||||
}
|
||||
|
||||
static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) {
|
||||
int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE;
|
||||
arange_f32<<<num_blocks, CUDA_ARANGE_BLOCK_SIZE, 0, stream>>>(dst, ne0, start, step);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
float start;
|
||||
float stop;
|
||||
float step;
|
||||
memcpy(&start, (float *)dst->op_params + 0, sizeof(float));
|
||||
memcpy(&stop, (float *)dst->op_params + 1, sizeof(float));
|
||||
memcpy(&step, (float *)dst->op_params + 2, sizeof(float));
|
||||
|
||||
int64_t steps = (int64_t)ceil((stop - start) / step);
|
||||
GGML_ASSERT(ggml_nelements(dst) == steps);
|
||||
|
||||
arange_f32_cuda(dst_d, dst->ne[0], start, step, stream);
|
||||
}
|
||||
31
llama/ggml-cuda/arange.cuh
vendored
31
llama/ggml-cuda/arange.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_ARANGE_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
29
llama/ggml-cuda/argmax.cuh
vendored
29
llama/ggml-cuda/argmax.cuh
vendored
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
29
llama/ggml-cuda/argsort.cuh
vendored
29
llama/ggml-cuda/argsort.cuh
vendored
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
35
llama/ggml-cuda/binbcast.cuh
vendored
35
llama/ggml-cuda/binbcast.cuh
vendored
@@ -1,35 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
60
llama/ggml-cuda/clamp.cu
vendored
60
llama/ggml-cuda/clamp.cu
vendored
@@ -1,60 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "clamp.cuh"
|
||||
|
||||
static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) {
|
||||
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i >= k) {
|
||||
return;
|
||||
}
|
||||
|
||||
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
|
||||
}
|
||||
|
||||
static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) {
|
||||
const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE;
|
||||
clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
|
||||
}
|
||||
|
||||
|
||||
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const float * src0_d = (const float *)src0->data;
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
|
||||
float min;
|
||||
float max;
|
||||
memcpy(&min, dst->op_params, sizeof(float));
|
||||
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
||||
|
||||
clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
|
||||
}
|
||||
31
llama/ggml-cuda/clamp.cuh
vendored
31
llama/ggml-cuda/clamp.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CLAMP_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/concat.cuh
vendored
31
llama/ggml-cuda/concat.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CONCAT_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/conv-transpose-1d.cuh
vendored
31
llama/ggml-cuda/conv-transpose-1d.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
39
llama/ggml-cuda/convert.cuh
vendored
39
llama/ggml-cuda/convert.cuh
vendored
@@ -1,39 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
|
||||
|
||||
template<typename T>
|
||||
using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream);
|
||||
|
||||
typedef to_t_cuda_t<float> to_fp32_cuda_t;
|
||||
typedef to_t_cuda_t<half> to_fp16_cuda_t;
|
||||
|
||||
to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type);
|
||||
|
||||
to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);
|
||||
31
llama/ggml-cuda/count-equal.cuh
vendored
31
llama/ggml-cuda/count-equal.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128
|
||||
|
||||
void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
35
llama/ggml-cuda/cpy.cuh
vendored
35
llama/ggml-cuda/cpy.cuh
vendored
@@ -1,35 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CPY_BLOCK_SIZE 64
|
||||
|
||||
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1);
|
||||
|
||||
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1);
|
||||
33
llama/ggml-cuda/cross-entropy-loss.cuh
vendored
33
llama/ggml-cuda/cross-entropy-loss.cuh
vendored
@@ -1,33 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/diagmask.cuh
vendored
31
llama/ggml-cuda/diagmask.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
|
||||
|
||||
void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
29
llama/ggml-cuda/fattn-tile-f16.cuh
vendored
29
llama/ggml-cuda/fattn-tile-f16.cuh
vendored
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
29
llama/ggml-cuda/fattn-tile-f32.cuh
vendored
29
llama/ggml-cuda/fattn-tile-f32.cuh
vendored
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
29
llama/ggml-cuda/fattn.cuh
vendored
29
llama/ggml-cuda/fattn.cuh
vendored
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/getrows.cuh
vendored
31
llama/ggml-cuda/getrows.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_GET_ROWS_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/im2col.cuh
vendored
31
llama/ggml-cuda/im2col.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_IM2COL_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
38
llama/ggml-cuda/mmv.cuh
vendored
38
llama/ggml-cuda/mmv.cuh
vendored
@@ -1,38 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available
|
||||
#define MMV_MAX_ROWS 512
|
||||
|
||||
void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_mul_mat_vec(
|
||||
ggml_backend_cuda_context & ctx,
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
||||
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
|
||||
const int64_t src1_padded_row_size, cudaStream_t stream);
|
||||
35
llama/ggml-cuda/mmvq.cuh
vendored
35
llama/ggml-cuda/mmvq.cuh
vendored
@@ -1,35 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
|
||||
|
||||
void ggml_cuda_op_mul_mat_vec_q(
|
||||
ggml_backend_cuda_context & ctx,
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
||||
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
|
||||
const int64_t src1_padded_row_size, cudaStream_t stream);
|
||||
33
llama/ggml-cuda/norm.cuh
vendored
33
llama/ggml-cuda/norm.cuh
vendored
@@ -1,33 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/opt-step-adamw.cuh
vendored
31
llama/ggml-cuda/opt-step-adamw.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
29
llama/ggml-cuda/out-prod.cuh
vendored
29
llama/ggml-cuda/out-prod.cuh
vendored
@@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
32
llama/ggml-cuda/pad.cuh
vendored
32
llama/ggml-cuda/pad.cuh
vendored
@@ -1,32 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_PAD_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/pool2d.cuh
vendored
31
llama/ggml-cuda/pool2d.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_POOL2D_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
50
llama/ggml-cuda/quantize.cuh
vendored
50
llama/ggml-cuda/quantize.cuh
vendored
@@ -1,50 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.cuh"
|
||||
#include "mmq.cuh"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#define CUDA_QUANTIZE_BLOCK_SIZE 256
|
||||
#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128
|
||||
|
||||
static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk of out-of-bounds access.");
|
||||
static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access.");
|
||||
|
||||
typedef void (*quantize_cuda_t)(
|
||||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
|
||||
const ggml_type type_x, cudaStream_t stream);
|
||||
|
||||
void quantize_row_q8_1_cuda(
|
||||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
|
||||
const ggml_type type_x, cudaStream_t stream);
|
||||
|
||||
void quantize_mmq_q8_1_cuda(
|
||||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
|
||||
const ggml_type type_x, cudaStream_t stream);
|
||||
31
llama/ggml-cuda/rope.cuh
vendored
31
llama/ggml-cuda/rope.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_ROPE_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
57
llama/ggml-cuda/scale.cu
vendored
57
llama/ggml-cuda/scale.cu
vendored
@@ -1,57 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "scale.cuh"
|
||||
|
||||
static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) {
|
||||
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i >= k) {
|
||||
return;
|
||||
}
|
||||
|
||||
dst[i] = scale * x[i];
|
||||
}
|
||||
|
||||
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
|
||||
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
|
||||
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const float * src0_d = (const float *)src0->data;
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
|
||||
float scale;
|
||||
memcpy(&scale, dst->op_params, sizeof(float));
|
||||
|
||||
scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream);
|
||||
}
|
||||
31
llama/ggml-cuda/scale.cuh
vendored
31
llama/ggml-cuda/scale.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_SCALE_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/softmax.cuh
vendored
31
llama/ggml-cuda/softmax.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
|
||||
|
||||
void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
31
llama/ggml-cuda/sum.cuh
vendored
31
llama/ggml-cuda/sum.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream);
|
||||
|
||||
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
65
llama/ggml-cuda/sumrows.cu
vendored
65
llama/ggml-cuda/sumrows.cu
vendored
@@ -1,65 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sumrows.cuh"
|
||||
|
||||
static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) {
|
||||
const int row = blockIdx.x;
|
||||
const int col = threadIdx.x;
|
||||
|
||||
float sum = 0.0f;
|
||||
for (int i = col; i < ncols; i += blockDim.x) {
|
||||
sum += x[row * ncols + i];
|
||||
}
|
||||
|
||||
sum = warp_reduce_sum(sum);
|
||||
|
||||
if (col == 0) {
|
||||
dst[row] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
|
||||
const dim3 block_dims(WARP_SIZE, 1, 1);
|
||||
const dim3 block_nums(nrows, 1, 1);
|
||||
k_sum_rows_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const float * src0_d = (const float *)src0->data;
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
|
||||
const int64_t ncols = src0->ne[0];
|
||||
const int64_t nrows = ggml_nrows(src0);
|
||||
|
||||
sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream);
|
||||
}
|
||||
31
llama/ggml-cuda/sumrows.cuh
vendored
31
llama/ggml-cuda/sumrows.cuh
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
|
||||
|
||||
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user